Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.128
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.128 2002/08/04 03:27:04 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.128 2002/08/04 03:27:04 oster Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_threadstuff.h"
    155 
    156 int     rf_kdebug_level = 0;
    157 
    158 #ifdef DEBUG
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    165 
    166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    167 
    168 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    169 						 * spare table */
    170 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    171 						 * installation process */
    172 
    173 /* prototypes */
    174 static void KernelWakeupFunc(struct buf * bp);
    175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    176 		   dev_t dev, RF_SectorNum_t startSect,
    177 		   RF_SectorCount_t numSect, caddr_t buf,
    178 		   void (*cbFunc) (struct buf *), void *cbArg,
    179 		   int logBytesPerSector, struct proc * b_proc);
    180 static void raidinit(RF_Raid_t *);
    181 
    182 void raidattach(int);
    183 int raidsize(dev_t);
    184 int raidopen(dev_t, int, int, struct proc *);
    185 int raidclose(dev_t, int, int, struct proc *);
    186 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
    187 int raidwrite(dev_t, struct uio *, int);
    188 int raidread(dev_t, struct uio *, int);
    189 void raidstrategy(struct buf *);
    190 int raiddump(dev_t, daddr_t, caddr_t, size_t);
    191 
    192 /*
    193  * Pilfered from ccd.c
    194  */
    195 
    196 struct raidbuf {
    197 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    198 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    199 	int     rf_flags;	/* misc. flags */
    200 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    201 };
    202 
    203 /* component buffer pool */
    204 struct pool raidframe_cbufpool;
    205 
    206 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    207 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    208 
    209 /* XXX Not sure if the following should be replacing the raidPtrs above,
    210    or if it should be used in conjunction with that...
    211 */
    212 
    213 struct raid_softc {
    214 	int     sc_flags;	/* flags */
    215 	int     sc_cflags;	/* configuration flags */
    216 	size_t  sc_size;        /* size of the raid device */
    217 	char    sc_xname[20];	/* XXX external name */
    218 	struct disk sc_dkdev;	/* generic disk device info */
    219 	struct bufq_state buf_queue;	/* used for the device queue */
    220 };
    221 /* sc_flags */
    222 #define RAIDF_INITED	0x01	/* unit has been initialized */
    223 #define RAIDF_WLABEL	0x02	/* label area is writable */
    224 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    225 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    226 #define RAIDF_LOCKED	0x80	/* unit is locked */
    227 
    228 #define	raidunit(x)	DISKUNIT(x)
    229 int numraid = 0;
    230 
    231 /*
    232  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    233  * Be aware that large numbers can allow the driver to consume a lot of
    234  * kernel memory, especially on writes, and in degraded mode reads.
    235  *
    236  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    237  * a single 64K write will typically require 64K for the old data,
    238  * 64K for the old parity, and 64K for the new parity, for a total
    239  * of 192K (if the parity buffer is not re-used immediately).
    240  * Even it if is used immediately, that's still 128K, which when multiplied
    241  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    242  *
    243  * Now in degraded mode, for example, a 64K read on the above setup may
    244  * require data reconstruction, which will require *all* of the 4 remaining
    245  * disks to participate -- 4 * 32K/disk == 128K again.
    246  */
    247 
    248 #ifndef RAIDOUTSTANDING
    249 #define RAIDOUTSTANDING   6
    250 #endif
    251 
    252 #define RAIDLABELDEV(dev)	\
    253 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    254 
    255 /* declared here, and made public, for the benefit of KVM stuff.. */
    256 struct raid_softc *raid_softc;
    257 
    258 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    259 				     struct disklabel *);
    260 static void raidgetdisklabel(dev_t);
    261 static void raidmakedisklabel(struct raid_softc *);
    262 
    263 static int raidlock(struct raid_softc *);
    264 static void raidunlock(struct raid_softc *);
    265 
    266 static void rf_markalldirty(RF_Raid_t *);
    267 void rf_mountroot_hook(struct device *);
    268 
    269 struct device *raidrootdev;
    270 
    271 void rf_ReconThread(struct rf_recon_req *);
    272 /* XXX what I want is: */
    273 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    274 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    275 void rf_CopybackThread(RF_Raid_t *raidPtr);
    276 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    277 void rf_buildroothack(void *);
    278 
    279 RF_AutoConfig_t *rf_find_raid_components(void);
    280 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    281 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    282 static int rf_reasonable_label(RF_ComponentLabel_t *);
    283 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    284 int rf_set_autoconfig(RF_Raid_t *, int);
    285 int rf_set_rootpartition(RF_Raid_t *, int);
    286 void rf_release_all_vps(RF_ConfigSet_t *);
    287 void rf_cleanup_config_set(RF_ConfigSet_t *);
    288 int rf_have_enough_components(RF_ConfigSet_t *);
    289 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    290 
    291 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    292 				  allow autoconfig to take place.
    293 			          Note that this is overridden by having
    294 			          RAID_AUTOCONFIG as an option in the
    295 			          kernel config file.  */
    296 
    297 void
    298 raidattach(num)
    299 	int     num;
    300 {
    301 	int raidID;
    302 	int i, rc;
    303 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    304 	RF_ConfigSet_t *config_sets;
    305 
    306 #ifdef DEBUG
    307 	printf("raidattach: Asked for %d units\n", num);
    308 #endif
    309 
    310 	if (num <= 0) {
    311 #ifdef DIAGNOSTIC
    312 		panic("raidattach: count <= 0");
    313 #endif
    314 		return;
    315 	}
    316 	/* This is where all the initialization stuff gets done. */
    317 
    318 	numraid = num;
    319 
    320 	/* Make some space for requested number of units... */
    321 
    322 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    323 	if (raidPtrs == NULL) {
    324 		panic("raidPtrs is NULL!!\n");
    325 	}
    326 
    327 	/* Initialize the component buffer pool. */
    328 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    329 	    0, 0, "raidpl", NULL);
    330 
    331 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    332 	if (rc) {
    333 		RF_PANIC();
    334 	}
    335 
    336 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    337 
    338 	for (i = 0; i < num; i++)
    339 		raidPtrs[i] = NULL;
    340 	rc = rf_BootRaidframe();
    341 	if (rc == 0)
    342 		printf("Kernelized RAIDframe activated\n");
    343 	else
    344 		panic("Serious error booting RAID!!\n");
    345 
    346 	/* put together some datastructures like the CCD device does.. This
    347 	 * lets us lock the device and what-not when it gets opened. */
    348 
    349 	raid_softc = (struct raid_softc *)
    350 		malloc(num * sizeof(struct raid_softc),
    351 		       M_RAIDFRAME, M_NOWAIT);
    352 	if (raid_softc == NULL) {
    353 		printf("WARNING: no memory for RAIDframe driver\n");
    354 		return;
    355 	}
    356 
    357 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    358 
    359 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    360 					      M_RAIDFRAME, M_NOWAIT);
    361 	if (raidrootdev == NULL) {
    362 		panic("No memory for RAIDframe driver!!?!?!\n");
    363 	}
    364 
    365 	for (raidID = 0; raidID < num; raidID++) {
    366 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    367 
    368 		raidrootdev[raidID].dv_class  = DV_DISK;
    369 		raidrootdev[raidID].dv_cfdata = NULL;
    370 		raidrootdev[raidID].dv_unit   = raidID;
    371 		raidrootdev[raidID].dv_parent = NULL;
    372 		raidrootdev[raidID].dv_flags  = 0;
    373 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    374 
    375 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    376 			  (RF_Raid_t *));
    377 		if (raidPtrs[raidID] == NULL) {
    378 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    379 			numraid = raidID;
    380 			return;
    381 		}
    382 	}
    383 
    384 #ifdef RAID_AUTOCONFIG
    385 	raidautoconfig = 1;
    386 #endif
    387 
    388 if (raidautoconfig) {
    389 	/* 1. locate all RAID components on the system */
    390 
    391 #if DEBUG
    392 	printf("Searching for raid components...\n");
    393 #endif
    394 	ac_list = rf_find_raid_components();
    395 
    396 	/* 2. sort them into their respective sets */
    397 
    398 	config_sets = rf_create_auto_sets(ac_list);
    399 
    400 	/* 3. evaluate each set and configure the valid ones
    401 	   This gets done in rf_buildroothack() */
    402 
    403 	/* schedule the creation of the thread to do the
    404 	   "/ on RAID" stuff */
    405 
    406 	kthread_create(rf_buildroothack,config_sets);
    407 
    408 #if 0
    409 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    410 #endif
    411 }
    412 
    413 }
    414 
    415 void
    416 rf_buildroothack(arg)
    417 	void *arg;
    418 {
    419 	RF_ConfigSet_t *config_sets = arg;
    420 	RF_ConfigSet_t *cset;
    421 	RF_ConfigSet_t *next_cset;
    422 	int retcode;
    423 	int raidID;
    424 	int rootID;
    425 	int num_root;
    426 
    427 	rootID = 0;
    428 	num_root = 0;
    429 	cset = config_sets;
    430 	while(cset != NULL ) {
    431 		next_cset = cset->next;
    432 		if (rf_have_enough_components(cset) &&
    433 		    cset->ac->clabel->autoconfigure==1) {
    434 			retcode = rf_auto_config_set(cset,&raidID);
    435 			if (!retcode) {
    436 				if (cset->rootable) {
    437 					rootID = raidID;
    438 					num_root++;
    439 				}
    440 			} else {
    441 				/* The autoconfig didn't work :( */
    442 #if DEBUG
    443 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    444 #endif
    445 				rf_release_all_vps(cset);
    446 			}
    447 		} else {
    448 			/* we're not autoconfiguring this set...
    449 			   release the associated resources */
    450 			rf_release_all_vps(cset);
    451 		}
    452 		/* cleanup */
    453 		rf_cleanup_config_set(cset);
    454 		cset = next_cset;
    455 	}
    456 
    457 	/* we found something bootable... */
    458 
    459 	if (num_root == 1) {
    460 		booted_device = &raidrootdev[rootID];
    461 	} else if (num_root > 1) {
    462 		/* we can't guess.. require the user to answer... */
    463 		boothowto |= RB_ASKNAME;
    464 	}
    465 }
    466 
    467 
    468 int
    469 raidsize(dev)
    470 	dev_t   dev;
    471 {
    472 	struct raid_softc *rs;
    473 	struct disklabel *lp;
    474 	int     part, unit, omask, size;
    475 
    476 	unit = raidunit(dev);
    477 	if (unit >= numraid)
    478 		return (-1);
    479 	rs = &raid_softc[unit];
    480 
    481 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    482 		return (-1);
    483 
    484 	part = DISKPART(dev);
    485 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    486 	lp = rs->sc_dkdev.dk_label;
    487 
    488 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    489 		return (-1);
    490 
    491 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    492 		size = -1;
    493 	else
    494 		size = lp->d_partitions[part].p_size *
    495 		    (lp->d_secsize / DEV_BSIZE);
    496 
    497 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    498 		return (-1);
    499 
    500 	return (size);
    501 
    502 }
    503 
    504 int
    505 raiddump(dev, blkno, va, size)
    506 	dev_t   dev;
    507 	daddr_t blkno;
    508 	caddr_t va;
    509 	size_t  size;
    510 {
    511 	/* Not implemented. */
    512 	return ENXIO;
    513 }
    514 /* ARGSUSED */
    515 int
    516 raidopen(dev, flags, fmt, p)
    517 	dev_t   dev;
    518 	int     flags, fmt;
    519 	struct proc *p;
    520 {
    521 	int     unit = raidunit(dev);
    522 	struct raid_softc *rs;
    523 	struct disklabel *lp;
    524 	int     part, pmask;
    525 	int     error = 0;
    526 
    527 	if (unit >= numraid)
    528 		return (ENXIO);
    529 	rs = &raid_softc[unit];
    530 
    531 	if ((error = raidlock(rs)) != 0)
    532 		return (error);
    533 	lp = rs->sc_dkdev.dk_label;
    534 
    535 	part = DISKPART(dev);
    536 	pmask = (1 << part);
    537 
    538 	db1_printf(("Opening raid device number: %d partition: %d\n",
    539 		unit, part));
    540 
    541 
    542 	if ((rs->sc_flags & RAIDF_INITED) &&
    543 	    (rs->sc_dkdev.dk_openmask == 0))
    544 		raidgetdisklabel(dev);
    545 
    546 	/* make sure that this partition exists */
    547 
    548 	if (part != RAW_PART) {
    549 		db1_printf(("Not a raw partition..\n"));
    550 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    551 		    ((part >= lp->d_npartitions) ||
    552 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    553 			error = ENXIO;
    554 			raidunlock(rs);
    555 			db1_printf(("Bailing out...\n"));
    556 			return (error);
    557 		}
    558 	}
    559 	/* Prevent this unit from being unconfigured while open. */
    560 	switch (fmt) {
    561 	case S_IFCHR:
    562 		rs->sc_dkdev.dk_copenmask |= pmask;
    563 		break;
    564 
    565 	case S_IFBLK:
    566 		rs->sc_dkdev.dk_bopenmask |= pmask;
    567 		break;
    568 	}
    569 
    570 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    571 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    572 		/* First one... mark things as dirty... Note that we *MUST*
    573 		 have done a configure before this.  I DO NOT WANT TO BE
    574 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    575 		 THAT THEY BELONG TOGETHER!!!!! */
    576 		/* XXX should check to see if we're only open for reading
    577 		   here... If so, we needn't do this, but then need some
    578 		   other way of keeping track of what's happened.. */
    579 
    580 		rf_markalldirty( raidPtrs[unit] );
    581 	}
    582 
    583 
    584 	rs->sc_dkdev.dk_openmask =
    585 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    586 
    587 	raidunlock(rs);
    588 
    589 	return (error);
    590 
    591 
    592 }
    593 /* ARGSUSED */
    594 int
    595 raidclose(dev, flags, fmt, p)
    596 	dev_t   dev;
    597 	int     flags, fmt;
    598 	struct proc *p;
    599 {
    600 	int     unit = raidunit(dev);
    601 	struct raid_softc *rs;
    602 	int     error = 0;
    603 	int     part;
    604 
    605 	if (unit >= numraid)
    606 		return (ENXIO);
    607 	rs = &raid_softc[unit];
    608 
    609 	if ((error = raidlock(rs)) != 0)
    610 		return (error);
    611 
    612 	part = DISKPART(dev);
    613 
    614 	/* ...that much closer to allowing unconfiguration... */
    615 	switch (fmt) {
    616 	case S_IFCHR:
    617 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    618 		break;
    619 
    620 	case S_IFBLK:
    621 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    622 		break;
    623 	}
    624 	rs->sc_dkdev.dk_openmask =
    625 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    626 
    627 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    628 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    629 		/* Last one... device is not unconfigured yet.
    630 		   Device shutdown has taken care of setting the
    631 		   clean bits if RAIDF_INITED is not set
    632 		   mark things as clean... */
    633 #if 0
    634 		printf("Last one on raid%d.  Updating status.\n",unit);
    635 #endif
    636 		rf_update_component_labels(raidPtrs[unit],
    637 						 RF_FINAL_COMPONENT_UPDATE);
    638 		if (doing_shutdown) {
    639 			/* last one, and we're going down, so
    640 			   lights out for this RAID set too. */
    641 			error = rf_Shutdown(raidPtrs[unit]);
    642 
    643 			/* It's no longer initialized... */
    644 			rs->sc_flags &= ~RAIDF_INITED;
    645 
    646 			/* Detach the disk. */
    647 			disk_detach(&rs->sc_dkdev);
    648 		}
    649 	}
    650 
    651 	raidunlock(rs);
    652 	return (0);
    653 
    654 }
    655 
    656 void
    657 raidstrategy(bp)
    658 	struct buf *bp;
    659 {
    660 	int s;
    661 
    662 	unsigned int raidID = raidunit(bp->b_dev);
    663 	RF_Raid_t *raidPtr;
    664 	struct raid_softc *rs = &raid_softc[raidID];
    665 	struct disklabel *lp;
    666 	int     wlabel;
    667 
    668 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    669 		bp->b_error = ENXIO;
    670 		bp->b_flags |= B_ERROR;
    671 		bp->b_resid = bp->b_bcount;
    672 		biodone(bp);
    673 		return;
    674 	}
    675 	if (raidID >= numraid || !raidPtrs[raidID]) {
    676 		bp->b_error = ENODEV;
    677 		bp->b_flags |= B_ERROR;
    678 		bp->b_resid = bp->b_bcount;
    679 		biodone(bp);
    680 		return;
    681 	}
    682 	raidPtr = raidPtrs[raidID];
    683 	if (!raidPtr->valid) {
    684 		bp->b_error = ENODEV;
    685 		bp->b_flags |= B_ERROR;
    686 		bp->b_resid = bp->b_bcount;
    687 		biodone(bp);
    688 		return;
    689 	}
    690 	if (bp->b_bcount == 0) {
    691 		db1_printf(("b_bcount is zero..\n"));
    692 		biodone(bp);
    693 		return;
    694 	}
    695 	lp = rs->sc_dkdev.dk_label;
    696 
    697 	/*
    698 	 * Do bounds checking and adjust transfer.  If there's an
    699 	 * error, the bounds check will flag that for us.
    700 	 */
    701 
    702 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    703 	if (DISKPART(bp->b_dev) != RAW_PART)
    704 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    705 			db1_printf(("Bounds check failed!!:%d %d\n",
    706 				(int) bp->b_blkno, (int) wlabel));
    707 			biodone(bp);
    708 			return;
    709 		}
    710 	s = splbio();
    711 
    712 	bp->b_resid = 0;
    713 
    714 	/* stuff it onto our queue */
    715 	BUFQ_PUT(&rs->buf_queue, bp);
    716 
    717 	raidstart(raidPtrs[raidID]);
    718 
    719 	splx(s);
    720 }
    721 /* ARGSUSED */
    722 int
    723 raidread(dev, uio, flags)
    724 	dev_t   dev;
    725 	struct uio *uio;
    726 	int     flags;
    727 {
    728 	int     unit = raidunit(dev);
    729 	struct raid_softc *rs;
    730 	int     part;
    731 
    732 	if (unit >= numraid)
    733 		return (ENXIO);
    734 	rs = &raid_softc[unit];
    735 
    736 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    737 		return (ENXIO);
    738 	part = DISKPART(dev);
    739 
    740 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    741 
    742 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    743 
    744 }
    745 /* ARGSUSED */
    746 int
    747 raidwrite(dev, uio, flags)
    748 	dev_t   dev;
    749 	struct uio *uio;
    750 	int     flags;
    751 {
    752 	int     unit = raidunit(dev);
    753 	struct raid_softc *rs;
    754 
    755 	if (unit >= numraid)
    756 		return (ENXIO);
    757 	rs = &raid_softc[unit];
    758 
    759 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    760 		return (ENXIO);
    761 	db1_printf(("raidwrite\n"));
    762 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    763 
    764 }
    765 
    766 int
    767 raidioctl(dev, cmd, data, flag, p)
    768 	dev_t   dev;
    769 	u_long  cmd;
    770 	caddr_t data;
    771 	int     flag;
    772 	struct proc *p;
    773 {
    774 	int     unit = raidunit(dev);
    775 	int     error = 0;
    776 	int     part, pmask;
    777 	struct raid_softc *rs;
    778 	RF_Config_t *k_cfg, *u_cfg;
    779 	RF_Raid_t *raidPtr;
    780 	RF_RaidDisk_t *diskPtr;
    781 	RF_AccTotals_t *totals;
    782 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    783 	u_char *specific_buf;
    784 	int retcode = 0;
    785 	int row;
    786 	int column;
    787 	int raidid;
    788 	struct rf_recon_req *rrcopy, *rr;
    789 	RF_ComponentLabel_t *clabel;
    790 	RF_ComponentLabel_t ci_label;
    791 	RF_ComponentLabel_t **clabel_ptr;
    792 	RF_SingleComponent_t *sparePtr,*componentPtr;
    793 	RF_SingleComponent_t hot_spare;
    794 	RF_SingleComponent_t component;
    795 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    796 	int i, j, d;
    797 #ifdef __HAVE_OLD_DISKLABEL
    798 	struct disklabel newlabel;
    799 #endif
    800 
    801 	if (unit >= numraid)
    802 		return (ENXIO);
    803 	rs = &raid_softc[unit];
    804 	raidPtr = raidPtrs[unit];
    805 
    806 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    807 		(int) DISKPART(dev), (int) unit, (int) cmd));
    808 
    809 	/* Must be open for writes for these commands... */
    810 	switch (cmd) {
    811 	case DIOCSDINFO:
    812 	case DIOCWDINFO:
    813 #ifdef __HAVE_OLD_DISKLABEL
    814 	case ODIOCWDINFO:
    815 	case ODIOCSDINFO:
    816 #endif
    817 	case DIOCWLABEL:
    818 		if ((flag & FWRITE) == 0)
    819 			return (EBADF);
    820 	}
    821 
    822 	/* Must be initialized for these... */
    823 	switch (cmd) {
    824 	case DIOCGDINFO:
    825 	case DIOCSDINFO:
    826 	case DIOCWDINFO:
    827 #ifdef __HAVE_OLD_DISKLABEL
    828 	case ODIOCGDINFO:
    829 	case ODIOCWDINFO:
    830 	case ODIOCSDINFO:
    831 	case ODIOCGDEFLABEL:
    832 #endif
    833 	case DIOCGPART:
    834 	case DIOCWLABEL:
    835 	case DIOCGDEFLABEL:
    836 	case RAIDFRAME_SHUTDOWN:
    837 	case RAIDFRAME_REWRITEPARITY:
    838 	case RAIDFRAME_GET_INFO:
    839 	case RAIDFRAME_RESET_ACCTOTALS:
    840 	case RAIDFRAME_GET_ACCTOTALS:
    841 	case RAIDFRAME_KEEP_ACCTOTALS:
    842 	case RAIDFRAME_GET_SIZE:
    843 	case RAIDFRAME_FAIL_DISK:
    844 	case RAIDFRAME_COPYBACK:
    845 	case RAIDFRAME_CHECK_RECON_STATUS:
    846 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    847 	case RAIDFRAME_GET_COMPONENT_LABEL:
    848 	case RAIDFRAME_SET_COMPONENT_LABEL:
    849 	case RAIDFRAME_ADD_HOT_SPARE:
    850 	case RAIDFRAME_REMOVE_HOT_SPARE:
    851 	case RAIDFRAME_INIT_LABELS:
    852 	case RAIDFRAME_REBUILD_IN_PLACE:
    853 	case RAIDFRAME_CHECK_PARITY:
    854 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    855 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    856 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    857 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    858 	case RAIDFRAME_SET_AUTOCONFIG:
    859 	case RAIDFRAME_SET_ROOT:
    860 	case RAIDFRAME_DELETE_COMPONENT:
    861 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    862 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    863 			return (ENXIO);
    864 	}
    865 
    866 	switch (cmd) {
    867 
    868 		/* configure the system */
    869 	case RAIDFRAME_CONFIGURE:
    870 
    871 		if (raidPtr->valid) {
    872 			/* There is a valid RAID set running on this unit! */
    873 			printf("raid%d: Device already configured!\n",unit);
    874 			return(EINVAL);
    875 		}
    876 
    877 		/* copy-in the configuration information */
    878 		/* data points to a pointer to the configuration structure */
    879 
    880 		u_cfg = *((RF_Config_t **) data);
    881 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    882 		if (k_cfg == NULL) {
    883 			return (ENOMEM);
    884 		}
    885 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    886 		    sizeof(RF_Config_t));
    887 		if (retcode) {
    888 			RF_Free(k_cfg, sizeof(RF_Config_t));
    889 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    890 				retcode));
    891 			return (retcode);
    892 		}
    893 		/* allocate a buffer for the layout-specific data, and copy it
    894 		 * in */
    895 		if (k_cfg->layoutSpecificSize) {
    896 			if (k_cfg->layoutSpecificSize > 10000) {
    897 				/* sanity check */
    898 				RF_Free(k_cfg, sizeof(RF_Config_t));
    899 				return (EINVAL);
    900 			}
    901 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    902 			    (u_char *));
    903 			if (specific_buf == NULL) {
    904 				RF_Free(k_cfg, sizeof(RF_Config_t));
    905 				return (ENOMEM);
    906 			}
    907 			retcode = copyin(k_cfg->layoutSpecific,
    908 			    (caddr_t) specific_buf,
    909 			    k_cfg->layoutSpecificSize);
    910 			if (retcode) {
    911 				RF_Free(k_cfg, sizeof(RF_Config_t));
    912 				RF_Free(specific_buf,
    913 					k_cfg->layoutSpecificSize);
    914 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    915 					retcode));
    916 				return (retcode);
    917 			}
    918 		} else
    919 			specific_buf = NULL;
    920 		k_cfg->layoutSpecific = specific_buf;
    921 
    922 		/* should do some kind of sanity check on the configuration.
    923 		 * Store the sum of all the bytes in the last byte? */
    924 
    925 		/* configure the system */
    926 
    927 		/*
    928 		 * Clear the entire RAID descriptor, just to make sure
    929 		 *  there is no stale data left in the case of a
    930 		 *  reconfiguration
    931 		 */
    932 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    933 		raidPtr->raidid = unit;
    934 
    935 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    936 
    937 		if (retcode == 0) {
    938 
    939 			/* allow this many simultaneous IO's to
    940 			   this RAID device */
    941 			raidPtr->openings = RAIDOUTSTANDING;
    942 
    943 			raidinit(raidPtr);
    944 			rf_markalldirty(raidPtr);
    945 		}
    946 		/* free the buffers.  No return code here. */
    947 		if (k_cfg->layoutSpecificSize) {
    948 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    949 		}
    950 		RF_Free(k_cfg, sizeof(RF_Config_t));
    951 
    952 		return (retcode);
    953 
    954 		/* shutdown the system */
    955 	case RAIDFRAME_SHUTDOWN:
    956 
    957 		if ((error = raidlock(rs)) != 0)
    958 			return (error);
    959 
    960 		/*
    961 		 * If somebody has a partition mounted, we shouldn't
    962 		 * shutdown.
    963 		 */
    964 
    965 		part = DISKPART(dev);
    966 		pmask = (1 << part);
    967 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    968 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    969 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    970 			raidunlock(rs);
    971 			return (EBUSY);
    972 		}
    973 
    974 		retcode = rf_Shutdown(raidPtr);
    975 
    976 		/* It's no longer initialized... */
    977 		rs->sc_flags &= ~RAIDF_INITED;
    978 
    979 		/* Detach the disk. */
    980 		disk_detach(&rs->sc_dkdev);
    981 
    982 		raidunlock(rs);
    983 
    984 		return (retcode);
    985 	case RAIDFRAME_GET_COMPONENT_LABEL:
    986 		clabel_ptr = (RF_ComponentLabel_t **) data;
    987 		/* need to read the component label for the disk indicated
    988 		   by row,column in clabel */
    989 
    990 		/* For practice, let's get it directly fromdisk, rather
    991 		   than from the in-core copy */
    992 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    993 			   (RF_ComponentLabel_t *));
    994 		if (clabel == NULL)
    995 			return (ENOMEM);
    996 
    997 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
    998 
    999 		retcode = copyin( *clabel_ptr, clabel,
   1000 				  sizeof(RF_ComponentLabel_t));
   1001 
   1002 		if (retcode) {
   1003 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1004 			return(retcode);
   1005 		}
   1006 
   1007 		row = clabel->row;
   1008 		column = clabel->column;
   1009 
   1010 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1011 		    (column < 0) || (column >= raidPtr->numCol +
   1012 				     raidPtr->numSpare)) {
   1013 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1014 			return(EINVAL);
   1015 		}
   1016 
   1017 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1018 				raidPtr->raid_cinfo[row][column].ci_vp,
   1019 				clabel );
   1020 
   1021 		retcode = copyout((caddr_t) clabel,
   1022 				  (caddr_t) *clabel_ptr,
   1023 				  sizeof(RF_ComponentLabel_t));
   1024 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1025 		return (retcode);
   1026 
   1027 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1028 		clabel = (RF_ComponentLabel_t *) data;
   1029 
   1030 		/* XXX check the label for valid stuff... */
   1031 		/* Note that some things *should not* get modified --
   1032 		   the user should be re-initing the labels instead of
   1033 		   trying to patch things.
   1034 		   */
   1035 
   1036 		raidid = raidPtr->raidid;
   1037 		printf("raid%d: Got component label:\n", raidid);
   1038 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1039 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1040 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1041 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1042 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1043 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1044 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1045 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1046 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1047 
   1048 		row = clabel->row;
   1049 		column = clabel->column;
   1050 
   1051 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1052 		    (column < 0) || (column >= raidPtr->numCol)) {
   1053 			return(EINVAL);
   1054 		}
   1055 
   1056 		/* XXX this isn't allowed to do anything for now :-) */
   1057 
   1058 		/* XXX and before it is, we need to fill in the rest
   1059 		   of the fields!?!?!?! */
   1060 #if 0
   1061 		raidwrite_component_label(
   1062                             raidPtr->Disks[row][column].dev,
   1063 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1064 			    clabel );
   1065 #endif
   1066 		return (0);
   1067 
   1068 	case RAIDFRAME_INIT_LABELS:
   1069 		clabel = (RF_ComponentLabel_t *) data;
   1070 		/*
   1071 		   we only want the serial number from
   1072 		   the above.  We get all the rest of the information
   1073 		   from the config that was used to create this RAID
   1074 		   set.
   1075 		   */
   1076 
   1077 		raidPtr->serial_number = clabel->serial_number;
   1078 
   1079 		raid_init_component_label(raidPtr, &ci_label);
   1080 		ci_label.serial_number = clabel->serial_number;
   1081 
   1082 		for(row=0;row<raidPtr->numRow;row++) {
   1083 			ci_label.row = row;
   1084 			for(column=0;column<raidPtr->numCol;column++) {
   1085 				diskPtr = &raidPtr->Disks[row][column];
   1086 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1087 					ci_label.partitionSize = diskPtr->partitionSize;
   1088 					ci_label.column = column;
   1089 					raidwrite_component_label(
   1090 					  raidPtr->Disks[row][column].dev,
   1091 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1092 					  &ci_label );
   1093 				}
   1094 			}
   1095 		}
   1096 
   1097 		return (retcode);
   1098 	case RAIDFRAME_SET_AUTOCONFIG:
   1099 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1100 		printf("raid%d: New autoconfig value is: %d\n",
   1101 		       raidPtr->raidid, d);
   1102 		*(int *) data = d;
   1103 		return (retcode);
   1104 
   1105 	case RAIDFRAME_SET_ROOT:
   1106 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1107 		printf("raid%d: New rootpartition value is: %d\n",
   1108 		       raidPtr->raidid, d);
   1109 		*(int *) data = d;
   1110 		return (retcode);
   1111 
   1112 		/* initialize all parity */
   1113 	case RAIDFRAME_REWRITEPARITY:
   1114 
   1115 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1116 			/* Parity for RAID 0 is trivially correct */
   1117 			raidPtr->parity_good = RF_RAID_CLEAN;
   1118 			return(0);
   1119 		}
   1120 
   1121 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1122 			/* Re-write is already in progress! */
   1123 			return(EINVAL);
   1124 		}
   1125 
   1126 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1127 					   rf_RewriteParityThread,
   1128 					   raidPtr,"raid_parity");
   1129 		return (retcode);
   1130 
   1131 
   1132 	case RAIDFRAME_ADD_HOT_SPARE:
   1133 		sparePtr = (RF_SingleComponent_t *) data;
   1134 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1135 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1136 		return(retcode);
   1137 
   1138 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1139 		return(retcode);
   1140 
   1141 	case RAIDFRAME_DELETE_COMPONENT:
   1142 		componentPtr = (RF_SingleComponent_t *)data;
   1143 		memcpy( &component, componentPtr,
   1144 			sizeof(RF_SingleComponent_t));
   1145 		retcode = rf_delete_component(raidPtr, &component);
   1146 		return(retcode);
   1147 
   1148 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1149 		componentPtr = (RF_SingleComponent_t *)data;
   1150 		memcpy( &component, componentPtr,
   1151 			sizeof(RF_SingleComponent_t));
   1152 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1153 		return(retcode);
   1154 
   1155 	case RAIDFRAME_REBUILD_IN_PLACE:
   1156 
   1157 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1158 			/* Can't do this on a RAID 0!! */
   1159 			return(EINVAL);
   1160 		}
   1161 
   1162 		if (raidPtr->recon_in_progress == 1) {
   1163 			/* a reconstruct is already in progress! */
   1164 			return(EINVAL);
   1165 		}
   1166 
   1167 		componentPtr = (RF_SingleComponent_t *) data;
   1168 		memcpy( &component, componentPtr,
   1169 			sizeof(RF_SingleComponent_t));
   1170 		row = component.row;
   1171 		column = component.column;
   1172 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1173 		       row, column);
   1174 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1175 		    (column < 0) || (column >= raidPtr->numCol)) {
   1176 			return(EINVAL);
   1177 		}
   1178 
   1179 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1180 		if (rrcopy == NULL)
   1181 			return(ENOMEM);
   1182 
   1183 		rrcopy->raidPtr = (void *) raidPtr;
   1184 		rrcopy->row = row;
   1185 		rrcopy->col = column;
   1186 
   1187 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1188 					   rf_ReconstructInPlaceThread,
   1189 					   rrcopy,"raid_reconip");
   1190 		return(retcode);
   1191 
   1192 	case RAIDFRAME_GET_INFO:
   1193 		if (!raidPtr->valid)
   1194 			return (ENODEV);
   1195 		ucfgp = (RF_DeviceConfig_t **) data;
   1196 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1197 			  (RF_DeviceConfig_t *));
   1198 		if (d_cfg == NULL)
   1199 			return (ENOMEM);
   1200 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1201 		d_cfg->rows = raidPtr->numRow;
   1202 		d_cfg->cols = raidPtr->numCol;
   1203 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1204 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1205 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1206 			return (ENOMEM);
   1207 		}
   1208 		d_cfg->nspares = raidPtr->numSpare;
   1209 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1210 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1211 			return (ENOMEM);
   1212 		}
   1213 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1214 		d = 0;
   1215 		for (i = 0; i < d_cfg->rows; i++) {
   1216 			for (j = 0; j < d_cfg->cols; j++) {
   1217 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1218 				d++;
   1219 			}
   1220 		}
   1221 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1222 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1223 		}
   1224 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1225 				  sizeof(RF_DeviceConfig_t));
   1226 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1227 
   1228 		return (retcode);
   1229 
   1230 	case RAIDFRAME_CHECK_PARITY:
   1231 		*(int *) data = raidPtr->parity_good;
   1232 		return (0);
   1233 
   1234 	case RAIDFRAME_RESET_ACCTOTALS:
   1235 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1236 		return (0);
   1237 
   1238 	case RAIDFRAME_GET_ACCTOTALS:
   1239 		totals = (RF_AccTotals_t *) data;
   1240 		*totals = raidPtr->acc_totals;
   1241 		return (0);
   1242 
   1243 	case RAIDFRAME_KEEP_ACCTOTALS:
   1244 		raidPtr->keep_acc_totals = *(int *)data;
   1245 		return (0);
   1246 
   1247 	case RAIDFRAME_GET_SIZE:
   1248 		*(int *) data = raidPtr->totalSectors;
   1249 		return (0);
   1250 
   1251 		/* fail a disk & optionally start reconstruction */
   1252 	case RAIDFRAME_FAIL_DISK:
   1253 
   1254 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1255 			/* Can't do this on a RAID 0!! */
   1256 			return(EINVAL);
   1257 		}
   1258 
   1259 		rr = (struct rf_recon_req *) data;
   1260 
   1261 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1262 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1263 			return (EINVAL);
   1264 
   1265 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1266 		       unit, rr->row, rr->col);
   1267 
   1268 		/* make a copy of the recon request so that we don't rely on
   1269 		 * the user's buffer */
   1270 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1271 		if (rrcopy == NULL)
   1272 			return(ENOMEM);
   1273 		memcpy(rrcopy, rr, sizeof(*rr));
   1274 		rrcopy->raidPtr = (void *) raidPtr;
   1275 
   1276 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1277 					   rf_ReconThread,
   1278 					   rrcopy,"raid_recon");
   1279 		return (0);
   1280 
   1281 		/* invoke a copyback operation after recon on whatever disk
   1282 		 * needs it, if any */
   1283 	case RAIDFRAME_COPYBACK:
   1284 
   1285 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1286 			/* This makes no sense on a RAID 0!! */
   1287 			return(EINVAL);
   1288 		}
   1289 
   1290 		if (raidPtr->copyback_in_progress == 1) {
   1291 			/* Copyback is already in progress! */
   1292 			return(EINVAL);
   1293 		}
   1294 
   1295 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1296 					   rf_CopybackThread,
   1297 					   raidPtr,"raid_copyback");
   1298 		return (retcode);
   1299 
   1300 		/* return the percentage completion of reconstruction */
   1301 	case RAIDFRAME_CHECK_RECON_STATUS:
   1302 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1303 			/* This makes no sense on a RAID 0, so tell the
   1304 			   user it's done. */
   1305 			*(int *) data = 100;
   1306 			return(0);
   1307 		}
   1308 		row = 0; /* XXX we only consider a single row... */
   1309 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1310 			*(int *) data = 100;
   1311 		else
   1312 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1313 		return (0);
   1314 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1315 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1316 		row = 0; /* XXX we only consider a single row... */
   1317 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1318 			progressInfo.remaining = 0;
   1319 			progressInfo.completed = 100;
   1320 			progressInfo.total = 100;
   1321 		} else {
   1322 			progressInfo.total =
   1323 				raidPtr->reconControl[row]->numRUsTotal;
   1324 			progressInfo.completed =
   1325 				raidPtr->reconControl[row]->numRUsComplete;
   1326 			progressInfo.remaining = progressInfo.total -
   1327 				progressInfo.completed;
   1328 		}
   1329 		retcode = copyout((caddr_t) &progressInfo,
   1330 				  (caddr_t) *progressInfoPtr,
   1331 				  sizeof(RF_ProgressInfo_t));
   1332 		return (retcode);
   1333 
   1334 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1335 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1336 			/* This makes no sense on a RAID 0, so tell the
   1337 			   user it's done. */
   1338 			*(int *) data = 100;
   1339 			return(0);
   1340 		}
   1341 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1342 			*(int *) data = 100 *
   1343 				raidPtr->parity_rewrite_stripes_done /
   1344 				raidPtr->Layout.numStripe;
   1345 		} else {
   1346 			*(int *) data = 100;
   1347 		}
   1348 		return (0);
   1349 
   1350 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1351 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1352 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1353 			progressInfo.total = raidPtr->Layout.numStripe;
   1354 			progressInfo.completed =
   1355 				raidPtr->parity_rewrite_stripes_done;
   1356 			progressInfo.remaining = progressInfo.total -
   1357 				progressInfo.completed;
   1358 		} else {
   1359 			progressInfo.remaining = 0;
   1360 			progressInfo.completed = 100;
   1361 			progressInfo.total = 100;
   1362 		}
   1363 		retcode = copyout((caddr_t) &progressInfo,
   1364 				  (caddr_t) *progressInfoPtr,
   1365 				  sizeof(RF_ProgressInfo_t));
   1366 		return (retcode);
   1367 
   1368 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1369 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1370 			/* This makes no sense on a RAID 0 */
   1371 			*(int *) data = 100;
   1372 			return(0);
   1373 		}
   1374 		if (raidPtr->copyback_in_progress == 1) {
   1375 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1376 				raidPtr->Layout.numStripe;
   1377 		} else {
   1378 			*(int *) data = 100;
   1379 		}
   1380 		return (0);
   1381 
   1382 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1383 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1384 		if (raidPtr->copyback_in_progress == 1) {
   1385 			progressInfo.total = raidPtr->Layout.numStripe;
   1386 			progressInfo.completed =
   1387 				raidPtr->copyback_stripes_done;
   1388 			progressInfo.remaining = progressInfo.total -
   1389 				progressInfo.completed;
   1390 		} else {
   1391 			progressInfo.remaining = 0;
   1392 			progressInfo.completed = 100;
   1393 			progressInfo.total = 100;
   1394 		}
   1395 		retcode = copyout((caddr_t) &progressInfo,
   1396 				  (caddr_t) *progressInfoPtr,
   1397 				  sizeof(RF_ProgressInfo_t));
   1398 		return (retcode);
   1399 
   1400 		/* the sparetable daemon calls this to wait for the kernel to
   1401 		 * need a spare table. this ioctl does not return until a
   1402 		 * spare table is needed. XXX -- calling mpsleep here in the
   1403 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1404 		 * -- I should either compute the spare table in the kernel,
   1405 		 * or have a different -- XXX XXX -- interface (a different
   1406 		 * character device) for delivering the table     -- XXX */
   1407 #if 0
   1408 	case RAIDFRAME_SPARET_WAIT:
   1409 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1410 		while (!rf_sparet_wait_queue)
   1411 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1412 		waitreq = rf_sparet_wait_queue;
   1413 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1414 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1415 
   1416 		/* structure assignment */
   1417 		*((RF_SparetWait_t *) data) = *waitreq;
   1418 
   1419 		RF_Free(waitreq, sizeof(*waitreq));
   1420 		return (0);
   1421 
   1422 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1423 		 * code in it that will cause the dameon to exit */
   1424 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1425 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1426 		waitreq->fcol = -1;
   1427 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1428 		waitreq->next = rf_sparet_wait_queue;
   1429 		rf_sparet_wait_queue = waitreq;
   1430 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1431 		wakeup(&rf_sparet_wait_queue);
   1432 		return (0);
   1433 
   1434 		/* used by the spare table daemon to deliver a spare table
   1435 		 * into the kernel */
   1436 	case RAIDFRAME_SEND_SPARET:
   1437 
   1438 		/* install the spare table */
   1439 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1440 
   1441 		/* respond to the requestor.  the return status of the spare
   1442 		 * table installation is passed in the "fcol" field */
   1443 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1444 		waitreq->fcol = retcode;
   1445 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1446 		waitreq->next = rf_sparet_resp_queue;
   1447 		rf_sparet_resp_queue = waitreq;
   1448 		wakeup(&rf_sparet_resp_queue);
   1449 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1450 
   1451 		return (retcode);
   1452 #endif
   1453 
   1454 	default:
   1455 		break; /* fall through to the os-specific code below */
   1456 
   1457 	}
   1458 
   1459 	if (!raidPtr->valid)
   1460 		return (EINVAL);
   1461 
   1462 	/*
   1463 	 * Add support for "regular" device ioctls here.
   1464 	 */
   1465 
   1466 	switch (cmd) {
   1467 	case DIOCGDINFO:
   1468 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1469 		break;
   1470 #ifdef __HAVE_OLD_DISKLABEL
   1471 	case ODIOCGDINFO:
   1472 		newlabel = *(rs->sc_dkdev.dk_label);
   1473 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1474 			return ENOTTY;
   1475 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1476 		break;
   1477 #endif
   1478 
   1479 	case DIOCGPART:
   1480 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1481 		((struct partinfo *) data)->part =
   1482 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1483 		break;
   1484 
   1485 	case DIOCWDINFO:
   1486 	case DIOCSDINFO:
   1487 #ifdef __HAVE_OLD_DISKLABEL
   1488 	case ODIOCWDINFO:
   1489 	case ODIOCSDINFO:
   1490 #endif
   1491 	{
   1492 		struct disklabel *lp;
   1493 #ifdef __HAVE_OLD_DISKLABEL
   1494 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1495 			memset(&newlabel, 0, sizeof newlabel);
   1496 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1497 			lp = &newlabel;
   1498 		} else
   1499 #endif
   1500 		lp = (struct disklabel *)data;
   1501 
   1502 		if ((error = raidlock(rs)) != 0)
   1503 			return (error);
   1504 
   1505 		rs->sc_flags |= RAIDF_LABELLING;
   1506 
   1507 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1508 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1509 		if (error == 0) {
   1510 			if (cmd == DIOCWDINFO
   1511 #ifdef __HAVE_OLD_DISKLABEL
   1512 			    || cmd == ODIOCWDINFO
   1513 #endif
   1514 			   )
   1515 				error = writedisklabel(RAIDLABELDEV(dev),
   1516 				    raidstrategy, rs->sc_dkdev.dk_label,
   1517 				    rs->sc_dkdev.dk_cpulabel);
   1518 		}
   1519 		rs->sc_flags &= ~RAIDF_LABELLING;
   1520 
   1521 		raidunlock(rs);
   1522 
   1523 		if (error)
   1524 			return (error);
   1525 		break;
   1526 	}
   1527 
   1528 	case DIOCWLABEL:
   1529 		if (*(int *) data != 0)
   1530 			rs->sc_flags |= RAIDF_WLABEL;
   1531 		else
   1532 			rs->sc_flags &= ~RAIDF_WLABEL;
   1533 		break;
   1534 
   1535 	case DIOCGDEFLABEL:
   1536 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1537 		break;
   1538 
   1539 #ifdef __HAVE_OLD_DISKLABEL
   1540 	case ODIOCGDEFLABEL:
   1541 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1542 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1543 			return ENOTTY;
   1544 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1545 		break;
   1546 #endif
   1547 
   1548 	default:
   1549 		retcode = ENOTTY;
   1550 	}
   1551 	return (retcode);
   1552 
   1553 }
   1554 
   1555 
   1556 /* raidinit -- complete the rest of the initialization for the
   1557    RAIDframe device.  */
   1558 
   1559 
   1560 static void
   1561 raidinit(raidPtr)
   1562 	RF_Raid_t *raidPtr;
   1563 {
   1564 	struct raid_softc *rs;
   1565 	int     unit;
   1566 
   1567 	unit = raidPtr->raidid;
   1568 
   1569 	rs = &raid_softc[unit];
   1570 
   1571 	/* XXX should check return code first... */
   1572 	rs->sc_flags |= RAIDF_INITED;
   1573 
   1574 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1575 
   1576 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1577 
   1578 	/* disk_attach actually creates space for the CPU disklabel, among
   1579 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1580 	 * with disklabels. */
   1581 
   1582 	disk_attach(&rs->sc_dkdev);
   1583 
   1584 	/* XXX There may be a weird interaction here between this, and
   1585 	 * protectedSectors, as used in RAIDframe.  */
   1586 
   1587 	rs->sc_size = raidPtr->totalSectors;
   1588 
   1589 }
   1590 
   1591 /* wake up the daemon & tell it to get us a spare table
   1592  * XXX
   1593  * the entries in the queues should be tagged with the raidPtr
   1594  * so that in the extremely rare case that two recons happen at once,
   1595  * we know for which device were requesting a spare table
   1596  * XXX
   1597  *
   1598  * XXX This code is not currently used. GO
   1599  */
   1600 int
   1601 rf_GetSpareTableFromDaemon(req)
   1602 	RF_SparetWait_t *req;
   1603 {
   1604 	int     retcode;
   1605 
   1606 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1607 	req->next = rf_sparet_wait_queue;
   1608 	rf_sparet_wait_queue = req;
   1609 	wakeup(&rf_sparet_wait_queue);
   1610 
   1611 	/* mpsleep unlocks the mutex */
   1612 	while (!rf_sparet_resp_queue) {
   1613 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1614 		    "raidframe getsparetable", 0);
   1615 	}
   1616 	req = rf_sparet_resp_queue;
   1617 	rf_sparet_resp_queue = req->next;
   1618 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1619 
   1620 	retcode = req->fcol;
   1621 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1622 					 * alloc'd */
   1623 	return (retcode);
   1624 }
   1625 
   1626 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1627  * bp & passes it down.
   1628  * any calls originating in the kernel must use non-blocking I/O
   1629  * do some extra sanity checking to return "appropriate" error values for
   1630  * certain conditions (to make some standard utilities work)
   1631  *
   1632  * Formerly known as: rf_DoAccessKernel
   1633  */
   1634 void
   1635 raidstart(raidPtr)
   1636 	RF_Raid_t *raidPtr;
   1637 {
   1638 	RF_SectorCount_t num_blocks, pb, sum;
   1639 	RF_RaidAddr_t raid_addr;
   1640 	int     retcode;
   1641 	struct partition *pp;
   1642 	daddr_t blocknum;
   1643 	int     unit;
   1644 	struct raid_softc *rs;
   1645 	int     do_async;
   1646 	struct buf *bp;
   1647 
   1648 	unit = raidPtr->raidid;
   1649 	rs = &raid_softc[unit];
   1650 
   1651 	/* quick check to see if anything has died recently */
   1652 	RF_LOCK_MUTEX(raidPtr->mutex);
   1653 	if (raidPtr->numNewFailures > 0) {
   1654 		rf_update_component_labels(raidPtr,
   1655 					   RF_NORMAL_COMPONENT_UPDATE);
   1656 		raidPtr->numNewFailures--;
   1657 	}
   1658 
   1659 	/* Check to see if we're at the limit... */
   1660 	while (raidPtr->openings > 0) {
   1661 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1662 
   1663 		/* get the next item, if any, from the queue */
   1664 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1665 			/* nothing more to do */
   1666 			return;
   1667 		}
   1668 
   1669 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1670 		 * partition.. Need to make it absolute to the underlying
   1671 		 * device.. */
   1672 
   1673 		blocknum = bp->b_blkno;
   1674 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1675 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1676 			blocknum += pp->p_offset;
   1677 		}
   1678 
   1679 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1680 			    (int) blocknum));
   1681 
   1682 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1683 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1684 
   1685 		/* *THIS* is where we adjust what block we're going to...
   1686 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1687 		raid_addr = blocknum;
   1688 
   1689 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1690 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1691 		sum = raid_addr + num_blocks + pb;
   1692 		if (1 || rf_debugKernelAccess) {
   1693 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1694 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1695 				    (int) pb, (int) bp->b_resid));
   1696 		}
   1697 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1698 		    || (sum < num_blocks) || (sum < pb)) {
   1699 			bp->b_error = ENOSPC;
   1700 			bp->b_flags |= B_ERROR;
   1701 			bp->b_resid = bp->b_bcount;
   1702 			biodone(bp);
   1703 			RF_LOCK_MUTEX(raidPtr->mutex);
   1704 			continue;
   1705 		}
   1706 		/*
   1707 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1708 		 */
   1709 
   1710 		if (bp->b_bcount & raidPtr->sectorMask) {
   1711 			bp->b_error = EINVAL;
   1712 			bp->b_flags |= B_ERROR;
   1713 			bp->b_resid = bp->b_bcount;
   1714 			biodone(bp);
   1715 			RF_LOCK_MUTEX(raidPtr->mutex);
   1716 			continue;
   1717 
   1718 		}
   1719 		db1_printf(("Calling DoAccess..\n"));
   1720 
   1721 
   1722 		RF_LOCK_MUTEX(raidPtr->mutex);
   1723 		raidPtr->openings--;
   1724 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1725 
   1726 		/*
   1727 		 * Everything is async.
   1728 		 */
   1729 		do_async = 1;
   1730 
   1731 		disk_busy(&rs->sc_dkdev);
   1732 
   1733 		/* XXX we're still at splbio() here... do we *really*
   1734 		   need to be? */
   1735 
   1736 		/* don't ever condition on bp->b_flags & B_WRITE.
   1737 		 * always condition on B_READ instead */
   1738 
   1739 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1740 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1741 				      do_async, raid_addr, num_blocks,
   1742 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1743 
   1744 		RF_LOCK_MUTEX(raidPtr->mutex);
   1745 	}
   1746 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1747 }
   1748 
   1749 
   1750 
   1751 
   1752 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1753 
   1754 int
   1755 rf_DispatchKernelIO(queue, req)
   1756 	RF_DiskQueue_t *queue;
   1757 	RF_DiskQueueData_t *req;
   1758 {
   1759 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1760 	struct buf *bp;
   1761 	struct raidbuf *raidbp = NULL;
   1762 	struct raid_softc *rs;
   1763 	int     unit;
   1764 	int s;
   1765 
   1766 	s=0;
   1767 	/* s = splbio();*/ /* want to test this */
   1768 	/* XXX along with the vnode, we also need the softc associated with
   1769 	 * this device.. */
   1770 
   1771 	req->queue = queue;
   1772 
   1773 	unit = queue->raidPtr->raidid;
   1774 
   1775 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1776 
   1777 	if (unit >= numraid) {
   1778 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1779 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1780 	}
   1781 	rs = &raid_softc[unit];
   1782 
   1783 	bp = req->bp;
   1784 #if 1
   1785 	/* XXX when there is a physical disk failure, someone is passing us a
   1786 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1787 	 * without taking a performance hit... (not sure where the real bug
   1788 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1789 
   1790 	if (bp->b_flags & B_ERROR) {
   1791 		bp->b_flags &= ~B_ERROR;
   1792 	}
   1793 	if (bp->b_error != 0) {
   1794 		bp->b_error = 0;
   1795 	}
   1796 #endif
   1797 	raidbp = RAIDGETBUF(rs);
   1798 
   1799 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1800 
   1801 	/*
   1802 	 * context for raidiodone
   1803 	 */
   1804 	raidbp->rf_obp = bp;
   1805 	raidbp->req = req;
   1806 
   1807 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1808 
   1809 	switch (req->type) {
   1810 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1811 		/* XXX need to do something extra here.. */
   1812 		/* I'm leaving this in, as I've never actually seen it used,
   1813 		 * and I'd like folks to report it... GO */
   1814 		printf(("WAKEUP CALLED\n"));
   1815 		queue->numOutstanding++;
   1816 
   1817 		/* XXX need to glue the original buffer into this??  */
   1818 
   1819 		KernelWakeupFunc(&raidbp->rf_buf);
   1820 		break;
   1821 
   1822 	case RF_IO_TYPE_READ:
   1823 	case RF_IO_TYPE_WRITE:
   1824 
   1825 		if (req->tracerec) {
   1826 			RF_ETIMER_START(req->tracerec->timer);
   1827 		}
   1828 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1829 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1830 		    req->sectorOffset, req->numSector,
   1831 		    req->buf, KernelWakeupFunc, (void *) req,
   1832 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1833 
   1834 		if (rf_debugKernelAccess) {
   1835 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1836 				(long) bp->b_blkno));
   1837 		}
   1838 		queue->numOutstanding++;
   1839 		queue->last_deq_sector = req->sectorOffset;
   1840 		/* acc wouldn't have been let in if there were any pending
   1841 		 * reqs at any other priority */
   1842 		queue->curPriority = req->priority;
   1843 
   1844 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1845 			req->type, unit, queue->row, queue->col));
   1846 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1847 			(int) req->sectorOffset, (int) req->numSector,
   1848 			(int) (req->numSector <<
   1849 			    queue->raidPtr->logBytesPerSector),
   1850 			(int) queue->raidPtr->logBytesPerSector));
   1851 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1852 			raidbp->rf_buf.b_vp->v_numoutput++;
   1853 		}
   1854 		VOP_STRATEGY(&raidbp->rf_buf);
   1855 
   1856 		break;
   1857 
   1858 	default:
   1859 		panic("bad req->type in rf_DispatchKernelIO");
   1860 	}
   1861 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1862 	/* splx(s); */ /* want to test this */
   1863 	return (0);
   1864 }
   1865 /* this is the callback function associated with a I/O invoked from
   1866    kernel code.
   1867  */
   1868 static void
   1869 KernelWakeupFunc(vbp)
   1870 	struct buf *vbp;
   1871 {
   1872 	RF_DiskQueueData_t *req = NULL;
   1873 	RF_DiskQueue_t *queue;
   1874 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1875 	struct buf *bp;
   1876 	struct raid_softc *rs;
   1877 	int     unit;
   1878 	int s;
   1879 
   1880 	s = splbio();
   1881 	db1_printf(("recovering the request queue:\n"));
   1882 	req = raidbp->req;
   1883 
   1884 	bp = raidbp->rf_obp;
   1885 
   1886 	queue = (RF_DiskQueue_t *) req->queue;
   1887 
   1888 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1889 		bp->b_flags |= B_ERROR;
   1890 		bp->b_error = raidbp->rf_buf.b_error ?
   1891 		    raidbp->rf_buf.b_error : EIO;
   1892 	}
   1893 
   1894 	/* XXX methinks this could be wrong... */
   1895 #if 1
   1896 	bp->b_resid = raidbp->rf_buf.b_resid;
   1897 #endif
   1898 
   1899 	if (req->tracerec) {
   1900 		RF_ETIMER_STOP(req->tracerec->timer);
   1901 		RF_ETIMER_EVAL(req->tracerec->timer);
   1902 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1903 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1904 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1905 		req->tracerec->num_phys_ios++;
   1906 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1907 	}
   1908 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1909 
   1910 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1911 
   1912 
   1913 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1914 	 * ballistic, and mark the component as hosed... */
   1915 
   1916 	if (bp->b_flags & B_ERROR) {
   1917 		/* Mark the disk as dead */
   1918 		/* but only mark it once... */
   1919 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1920 		    rf_ds_optimal) {
   1921 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1922 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1923 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1924 			    rf_ds_failed;
   1925 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1926 			queue->raidPtr->numFailures++;
   1927 			queue->raidPtr->numNewFailures++;
   1928 		} else {	/* Disk is already dead... */
   1929 			/* printf("Disk already marked as dead!\n"); */
   1930 		}
   1931 
   1932 	}
   1933 
   1934 	rs = &raid_softc[unit];
   1935 	RAIDPUTBUF(rs, raidbp);
   1936 
   1937 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1938 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1939 
   1940 	splx(s);
   1941 }
   1942 
   1943 
   1944 
   1945 /*
   1946  * initialize a buf structure for doing an I/O in the kernel.
   1947  */
   1948 static void
   1949 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1950        logBytesPerSector, b_proc)
   1951 	struct buf *bp;
   1952 	struct vnode *b_vp;
   1953 	unsigned rw_flag;
   1954 	dev_t dev;
   1955 	RF_SectorNum_t startSect;
   1956 	RF_SectorCount_t numSect;
   1957 	caddr_t buf;
   1958 	void (*cbFunc) (struct buf *);
   1959 	void *cbArg;
   1960 	int logBytesPerSector;
   1961 	struct proc *b_proc;
   1962 {
   1963 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1964 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1965 	bp->b_bcount = numSect << logBytesPerSector;
   1966 	bp->b_bufsize = bp->b_bcount;
   1967 	bp->b_error = 0;
   1968 	bp->b_dev = dev;
   1969 	bp->b_data = buf;
   1970 	bp->b_blkno = startSect;
   1971 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1972 	if (bp->b_bcount == 0) {
   1973 		panic("bp->b_bcount is zero in InitBP!!\n");
   1974 	}
   1975 	bp->b_proc = b_proc;
   1976 	bp->b_iodone = cbFunc;
   1977 	bp->b_vp = b_vp;
   1978 
   1979 }
   1980 
   1981 static void
   1982 raidgetdefaultlabel(raidPtr, rs, lp)
   1983 	RF_Raid_t *raidPtr;
   1984 	struct raid_softc *rs;
   1985 	struct disklabel *lp;
   1986 {
   1987 	db1_printf(("Building a default label...\n"));
   1988 	memset(lp, 0, sizeof(*lp));
   1989 
   1990 	/* fabricate a label... */
   1991 	lp->d_secperunit = raidPtr->totalSectors;
   1992 	lp->d_secsize = raidPtr->bytesPerSector;
   1993 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1994 	lp->d_ntracks = 4 * raidPtr->numCol;
   1995 	lp->d_ncylinders = raidPtr->totalSectors /
   1996 		(lp->d_nsectors * lp->d_ntracks);
   1997 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1998 
   1999 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2000 	lp->d_type = DTYPE_RAID;
   2001 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2002 	lp->d_rpm = 3600;
   2003 	lp->d_interleave = 1;
   2004 	lp->d_flags = 0;
   2005 
   2006 	lp->d_partitions[RAW_PART].p_offset = 0;
   2007 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2008 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2009 	lp->d_npartitions = RAW_PART + 1;
   2010 
   2011 	lp->d_magic = DISKMAGIC;
   2012 	lp->d_magic2 = DISKMAGIC;
   2013 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2014 
   2015 }
   2016 /*
   2017  * Read the disklabel from the raid device.  If one is not present, fake one
   2018  * up.
   2019  */
   2020 static void
   2021 raidgetdisklabel(dev)
   2022 	dev_t   dev;
   2023 {
   2024 	int     unit = raidunit(dev);
   2025 	struct raid_softc *rs = &raid_softc[unit];
   2026 	char   *errstring;
   2027 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2028 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2029 	RF_Raid_t *raidPtr;
   2030 
   2031 	db1_printf(("Getting the disklabel...\n"));
   2032 
   2033 	memset(clp, 0, sizeof(*clp));
   2034 
   2035 	raidPtr = raidPtrs[unit];
   2036 
   2037 	raidgetdefaultlabel(raidPtr, rs, lp);
   2038 
   2039 	/*
   2040 	 * Call the generic disklabel extraction routine.
   2041 	 */
   2042 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2043 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2044 	if (errstring)
   2045 		raidmakedisklabel(rs);
   2046 	else {
   2047 		int     i;
   2048 		struct partition *pp;
   2049 
   2050 		/*
   2051 		 * Sanity check whether the found disklabel is valid.
   2052 		 *
   2053 		 * This is necessary since total size of the raid device
   2054 		 * may vary when an interleave is changed even though exactly
   2055 		 * same componets are used, and old disklabel may used
   2056 		 * if that is found.
   2057 		 */
   2058 		if (lp->d_secperunit != rs->sc_size)
   2059 			printf("raid%d: WARNING: %s: "
   2060 			    "total sector size in disklabel (%d) != "
   2061 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2062 			    lp->d_secperunit, (long) rs->sc_size);
   2063 		for (i = 0; i < lp->d_npartitions; i++) {
   2064 			pp = &lp->d_partitions[i];
   2065 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2066 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2067 				       "exceeds the size of raid (%ld)\n",
   2068 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2069 		}
   2070 	}
   2071 
   2072 }
   2073 /*
   2074  * Take care of things one might want to take care of in the event
   2075  * that a disklabel isn't present.
   2076  */
   2077 static void
   2078 raidmakedisklabel(rs)
   2079 	struct raid_softc *rs;
   2080 {
   2081 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2082 	db1_printf(("Making a label..\n"));
   2083 
   2084 	/*
   2085 	 * For historical reasons, if there's no disklabel present
   2086 	 * the raw partition must be marked FS_BSDFFS.
   2087 	 */
   2088 
   2089 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2090 
   2091 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2092 
   2093 	lp->d_checksum = dkcksum(lp);
   2094 }
   2095 /*
   2096  * Lookup the provided name in the filesystem.  If the file exists,
   2097  * is a valid block device, and isn't being used by anyone else,
   2098  * set *vpp to the file's vnode.
   2099  * You'll find the original of this in ccd.c
   2100  */
   2101 int
   2102 raidlookup(path, p, vpp)
   2103 	char   *path;
   2104 	struct proc *p;
   2105 	struct vnode **vpp;	/* result */
   2106 {
   2107 	struct nameidata nd;
   2108 	struct vnode *vp;
   2109 	struct vattr va;
   2110 	int     error;
   2111 
   2112 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2113 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2114 #if 0
   2115 		printf("RAIDframe: vn_open returned %d\n", error);
   2116 #endif
   2117 		return (error);
   2118 	}
   2119 	vp = nd.ni_vp;
   2120 	if (vp->v_usecount > 1) {
   2121 		VOP_UNLOCK(vp, 0);
   2122 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2123 		return (EBUSY);
   2124 	}
   2125 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2126 		VOP_UNLOCK(vp, 0);
   2127 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2128 		return (error);
   2129 	}
   2130 	/* XXX: eventually we should handle VREG, too. */
   2131 	if (va.va_type != VBLK) {
   2132 		VOP_UNLOCK(vp, 0);
   2133 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2134 		return (ENOTBLK);
   2135 	}
   2136 	VOP_UNLOCK(vp, 0);
   2137 	*vpp = vp;
   2138 	return (0);
   2139 }
   2140 /*
   2141  * Wait interruptibly for an exclusive lock.
   2142  *
   2143  * XXX
   2144  * Several drivers do this; it should be abstracted and made MP-safe.
   2145  * (Hmm... where have we seen this warning before :->  GO )
   2146  */
   2147 static int
   2148 raidlock(rs)
   2149 	struct raid_softc *rs;
   2150 {
   2151 	int     error;
   2152 
   2153 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2154 		rs->sc_flags |= RAIDF_WANTED;
   2155 		if ((error =
   2156 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2157 			return (error);
   2158 	}
   2159 	rs->sc_flags |= RAIDF_LOCKED;
   2160 	return (0);
   2161 }
   2162 /*
   2163  * Unlock and wake up any waiters.
   2164  */
   2165 static void
   2166 raidunlock(rs)
   2167 	struct raid_softc *rs;
   2168 {
   2169 
   2170 	rs->sc_flags &= ~RAIDF_LOCKED;
   2171 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2172 		rs->sc_flags &= ~RAIDF_WANTED;
   2173 		wakeup(rs);
   2174 	}
   2175 }
   2176 
   2177 
   2178 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2179 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2180 
   2181 int
   2182 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2183 {
   2184 	RF_ComponentLabel_t clabel;
   2185 	raidread_component_label(dev, b_vp, &clabel);
   2186 	clabel.mod_counter = mod_counter;
   2187 	clabel.clean = RF_RAID_CLEAN;
   2188 	raidwrite_component_label(dev, b_vp, &clabel);
   2189 	return(0);
   2190 }
   2191 
   2192 
   2193 int
   2194 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2195 {
   2196 	RF_ComponentLabel_t clabel;
   2197 	raidread_component_label(dev, b_vp, &clabel);
   2198 	clabel.mod_counter = mod_counter;
   2199 	clabel.clean = RF_RAID_DIRTY;
   2200 	raidwrite_component_label(dev, b_vp, &clabel);
   2201 	return(0);
   2202 }
   2203 
   2204 /* ARGSUSED */
   2205 int
   2206 raidread_component_label(dev, b_vp, clabel)
   2207 	dev_t dev;
   2208 	struct vnode *b_vp;
   2209 	RF_ComponentLabel_t *clabel;
   2210 {
   2211 	struct buf *bp;
   2212 	int error;
   2213 
   2214 	/* XXX should probably ensure that we don't try to do this if
   2215 	   someone has changed rf_protected_sectors. */
   2216 
   2217 	if (b_vp == NULL) {
   2218 		/* For whatever reason, this component is not valid.
   2219 		   Don't try to read a component label from it. */
   2220 		return(EINVAL);
   2221 	}
   2222 
   2223 	/* get a block of the appropriate size... */
   2224 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2225 	bp->b_dev = dev;
   2226 
   2227 	/* get our ducks in a row for the read */
   2228 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2229 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2230 	bp->b_flags |= B_READ;
   2231  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2232 
   2233 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2234 
   2235 	error = biowait(bp);
   2236 
   2237 	if (!error) {
   2238 		memcpy(clabel, bp->b_data,
   2239 		       sizeof(RF_ComponentLabel_t));
   2240 #if 0
   2241 		rf_print_component_label( clabel );
   2242 #endif
   2243         } else {
   2244 #if 0
   2245 		printf("Failed to read RAID component label!\n");
   2246 #endif
   2247 	}
   2248 
   2249 	brelse(bp);
   2250 	return(error);
   2251 }
   2252 /* ARGSUSED */
   2253 int
   2254 raidwrite_component_label(dev, b_vp, clabel)
   2255 	dev_t dev;
   2256 	struct vnode *b_vp;
   2257 	RF_ComponentLabel_t *clabel;
   2258 {
   2259 	struct buf *bp;
   2260 	int error;
   2261 
   2262 	/* get a block of the appropriate size... */
   2263 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2264 	bp->b_dev = dev;
   2265 
   2266 	/* get our ducks in a row for the write */
   2267 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2268 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2269 	bp->b_flags |= B_WRITE;
   2270  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2271 
   2272 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2273 
   2274 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2275 
   2276 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2277 	error = biowait(bp);
   2278 	brelse(bp);
   2279 	if (error) {
   2280 #if 1
   2281 		printf("Failed to write RAID component info!\n");
   2282 #endif
   2283 	}
   2284 
   2285 	return(error);
   2286 }
   2287 
   2288 void
   2289 rf_markalldirty(raidPtr)
   2290 	RF_Raid_t *raidPtr;
   2291 {
   2292 	RF_ComponentLabel_t clabel;
   2293 	int r,c;
   2294 
   2295 	raidPtr->mod_counter++;
   2296 	for (r = 0; r < raidPtr->numRow; r++) {
   2297 		for (c = 0; c < raidPtr->numCol; c++) {
   2298 			/* we don't want to touch (at all) a disk that has
   2299 			   failed */
   2300 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2301 				raidread_component_label(
   2302 					raidPtr->Disks[r][c].dev,
   2303 					raidPtr->raid_cinfo[r][c].ci_vp,
   2304 					&clabel);
   2305 				if (clabel.status == rf_ds_spared) {
   2306 					/* XXX do something special...
   2307 					 but whatever you do, don't
   2308 					 try to access it!! */
   2309 				} else {
   2310 #if 0
   2311 				clabel.status =
   2312 					raidPtr->Disks[r][c].status;
   2313 				raidwrite_component_label(
   2314 					raidPtr->Disks[r][c].dev,
   2315 					raidPtr->raid_cinfo[r][c].ci_vp,
   2316 					&clabel);
   2317 #endif
   2318 				raidmarkdirty(
   2319 				       raidPtr->Disks[r][c].dev,
   2320 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2321 				       raidPtr->mod_counter);
   2322 				}
   2323 			}
   2324 		}
   2325 	}
   2326 	/* printf("Component labels marked dirty.\n"); */
   2327 #if 0
   2328 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2329 		sparecol = raidPtr->numCol + c;
   2330 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2331 			/*
   2332 
   2333 			   XXX this is where we get fancy and map this spare
   2334 			   into it's correct spot in the array.
   2335 
   2336 			 */
   2337 			/*
   2338 
   2339 			   we claim this disk is "optimal" if it's
   2340 			   rf_ds_used_spare, as that means it should be
   2341 			   directly substitutable for the disk it replaced.
   2342 			   We note that too...
   2343 
   2344 			 */
   2345 
   2346 			for(i=0;i<raidPtr->numRow;i++) {
   2347 				for(j=0;j<raidPtr->numCol;j++) {
   2348 					if ((raidPtr->Disks[i][j].spareRow ==
   2349 					     r) &&
   2350 					    (raidPtr->Disks[i][j].spareCol ==
   2351 					     sparecol)) {
   2352 						srow = r;
   2353 						scol = sparecol;
   2354 						break;
   2355 					}
   2356 				}
   2357 			}
   2358 
   2359 			raidread_component_label(
   2360 				      raidPtr->Disks[r][sparecol].dev,
   2361 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2362 				      &clabel);
   2363 			/* make sure status is noted */
   2364 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2365 			clabel.mod_counter = raidPtr->mod_counter;
   2366 			clabel.serial_number = raidPtr->serial_number;
   2367 			clabel.row = srow;
   2368 			clabel.column = scol;
   2369 			clabel.num_rows = raidPtr->numRow;
   2370 			clabel.num_columns = raidPtr->numCol;
   2371 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2372 			clabel.status = rf_ds_optimal;
   2373 			raidwrite_component_label(
   2374 				      raidPtr->Disks[r][sparecol].dev,
   2375 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2376 				      &clabel);
   2377 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2378 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2379 		}
   2380 	}
   2381 
   2382 #endif
   2383 }
   2384 
   2385 
   2386 void
   2387 rf_update_component_labels(raidPtr, final)
   2388 	RF_Raid_t *raidPtr;
   2389 	int final;
   2390 {
   2391 	RF_ComponentLabel_t clabel;
   2392 	int sparecol;
   2393 	int r,c;
   2394 	int i,j;
   2395 	int srow, scol;
   2396 
   2397 	srow = -1;
   2398 	scol = -1;
   2399 
   2400 	/* XXX should do extra checks to make sure things really are clean,
   2401 	   rather than blindly setting the clean bit... */
   2402 
   2403 	raidPtr->mod_counter++;
   2404 
   2405 	for (r = 0; r < raidPtr->numRow; r++) {
   2406 		for (c = 0; c < raidPtr->numCol; c++) {
   2407 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2408 				raidread_component_label(
   2409 					raidPtr->Disks[r][c].dev,
   2410 					raidPtr->raid_cinfo[r][c].ci_vp,
   2411 					&clabel);
   2412 				/* make sure status is noted */
   2413 				clabel.status = rf_ds_optimal;
   2414 				/* bump the counter */
   2415 				clabel.mod_counter = raidPtr->mod_counter;
   2416 
   2417 				raidwrite_component_label(
   2418 					raidPtr->Disks[r][c].dev,
   2419 					raidPtr->raid_cinfo[r][c].ci_vp,
   2420 					&clabel);
   2421 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2422 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2423 						raidmarkclean(
   2424 							      raidPtr->Disks[r][c].dev,
   2425 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2426 							      raidPtr->mod_counter);
   2427 					}
   2428 				}
   2429 			}
   2430 			/* else we don't touch it.. */
   2431 		}
   2432 	}
   2433 
   2434 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2435 		sparecol = raidPtr->numCol + c;
   2436 		/* Need to ensure that the reconstruct actually completed! */
   2437 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2438 			/*
   2439 
   2440 			   we claim this disk is "optimal" if it's
   2441 			   rf_ds_used_spare, as that means it should be
   2442 			   directly substitutable for the disk it replaced.
   2443 			   We note that too...
   2444 
   2445 			 */
   2446 
   2447 			for(i=0;i<raidPtr->numRow;i++) {
   2448 				for(j=0;j<raidPtr->numCol;j++) {
   2449 					if ((raidPtr->Disks[i][j].spareRow ==
   2450 					     0) &&
   2451 					    (raidPtr->Disks[i][j].spareCol ==
   2452 					     sparecol)) {
   2453 						srow = i;
   2454 						scol = j;
   2455 						break;
   2456 					}
   2457 				}
   2458 			}
   2459 
   2460 			/* XXX shouldn't *really* need this... */
   2461 			raidread_component_label(
   2462 				      raidPtr->Disks[0][sparecol].dev,
   2463 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2464 				      &clabel);
   2465 			/* make sure status is noted */
   2466 
   2467 			raid_init_component_label(raidPtr, &clabel);
   2468 
   2469 			clabel.mod_counter = raidPtr->mod_counter;
   2470 			clabel.row = srow;
   2471 			clabel.column = scol;
   2472 			clabel.status = rf_ds_optimal;
   2473 
   2474 			raidwrite_component_label(
   2475 				      raidPtr->Disks[0][sparecol].dev,
   2476 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2477 				      &clabel);
   2478 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2479 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2480 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2481 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2482 						       raidPtr->mod_counter);
   2483 				}
   2484 			}
   2485 		}
   2486 	}
   2487 	/* 	printf("Component labels updated\n"); */
   2488 }
   2489 
   2490 void
   2491 rf_close_component(raidPtr, vp, auto_configured)
   2492 	RF_Raid_t *raidPtr;
   2493 	struct vnode *vp;
   2494 	int auto_configured;
   2495 {
   2496 	struct proc *p;
   2497 
   2498 	p = raidPtr->engine_thread;
   2499 
   2500 	if (vp != NULL) {
   2501 		if (auto_configured == 1) {
   2502 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2503 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2504 			vput(vp);
   2505 
   2506 		} else {
   2507 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2508 		}
   2509 	} else {
   2510 #if 0
   2511 		printf("vnode was NULL\n");
   2512 #endif
   2513 	}
   2514 }
   2515 
   2516 
   2517 void
   2518 rf_UnconfigureVnodes(raidPtr)
   2519 	RF_Raid_t *raidPtr;
   2520 {
   2521 	int r,c;
   2522 	struct proc *p;
   2523 	struct vnode *vp;
   2524 	int acd;
   2525 
   2526 
   2527 	/* We take this opportunity to close the vnodes like we should.. */
   2528 
   2529 	p = raidPtr->engine_thread;
   2530 
   2531 	for (r = 0; r < raidPtr->numRow; r++) {
   2532 		for (c = 0; c < raidPtr->numCol; c++) {
   2533 #if 0
   2534 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2535 			       raidPtr->raidid, r, c);
   2536 #endif
   2537 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2538 			acd = raidPtr->Disks[r][c].auto_configured;
   2539 			rf_close_component(raidPtr, vp, acd);
   2540 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2541 			raidPtr->Disks[r][c].auto_configured = 0;
   2542 		}
   2543 	}
   2544 	for (r = 0; r < raidPtr->numSpare; r++) {
   2545 #if 0
   2546 		printf("raid%d: Closing vnode for spare: %d\n",
   2547 		       raidPtr->raidid, r);
   2548 #endif
   2549 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2550 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2551 		rf_close_component(raidPtr, vp, acd);
   2552 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2553 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2554 	}
   2555 }
   2556 
   2557 
   2558 void
   2559 rf_ReconThread(req)
   2560 	struct rf_recon_req *req;
   2561 {
   2562 	int     s;
   2563 	RF_Raid_t *raidPtr;
   2564 
   2565 	s = splbio();
   2566 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2567 	raidPtr->recon_in_progress = 1;
   2568 
   2569 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2570 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2571 
   2572 	/* XXX get rid of this! we don't need it at all.. */
   2573 	RF_Free(req, sizeof(*req));
   2574 
   2575 	raidPtr->recon_in_progress = 0;
   2576 	splx(s);
   2577 
   2578 	/* That's all... */
   2579 	kthread_exit(0);        /* does not return */
   2580 }
   2581 
   2582 void
   2583 rf_RewriteParityThread(raidPtr)
   2584 	RF_Raid_t *raidPtr;
   2585 {
   2586 	int retcode;
   2587 	int s;
   2588 
   2589 	raidPtr->parity_rewrite_in_progress = 1;
   2590 	s = splbio();
   2591 	retcode = rf_RewriteParity(raidPtr);
   2592 	splx(s);
   2593 	if (retcode) {
   2594 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2595 	} else {
   2596 		/* set the clean bit!  If we shutdown correctly,
   2597 		   the clean bit on each component label will get
   2598 		   set */
   2599 		raidPtr->parity_good = RF_RAID_CLEAN;
   2600 	}
   2601 	raidPtr->parity_rewrite_in_progress = 0;
   2602 
   2603 	/* Anyone waiting for us to stop?  If so, inform them... */
   2604 	if (raidPtr->waitShutdown) {
   2605 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2606 	}
   2607 
   2608 	/* That's all... */
   2609 	kthread_exit(0);        /* does not return */
   2610 }
   2611 
   2612 
   2613 void
   2614 rf_CopybackThread(raidPtr)
   2615 	RF_Raid_t *raidPtr;
   2616 {
   2617 	int s;
   2618 
   2619 	raidPtr->copyback_in_progress = 1;
   2620 	s = splbio();
   2621 	rf_CopybackReconstructedData(raidPtr);
   2622 	splx(s);
   2623 	raidPtr->copyback_in_progress = 0;
   2624 
   2625 	/* That's all... */
   2626 	kthread_exit(0);        /* does not return */
   2627 }
   2628 
   2629 
   2630 void
   2631 rf_ReconstructInPlaceThread(req)
   2632 	struct rf_recon_req *req;
   2633 {
   2634 	int retcode;
   2635 	int s;
   2636 	RF_Raid_t *raidPtr;
   2637 
   2638 	s = splbio();
   2639 	raidPtr = req->raidPtr;
   2640 	raidPtr->recon_in_progress = 1;
   2641 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2642 	RF_Free(req, sizeof(*req));
   2643 	raidPtr->recon_in_progress = 0;
   2644 	splx(s);
   2645 
   2646 	/* That's all... */
   2647 	kthread_exit(0);        /* does not return */
   2648 }
   2649 
   2650 void
   2651 rf_mountroot_hook(dev)
   2652 	struct device *dev;
   2653 {
   2654 
   2655 }
   2656 
   2657 
   2658 RF_AutoConfig_t *
   2659 rf_find_raid_components()
   2660 {
   2661 	struct devnametobdevmaj *dtobdm;
   2662 	struct vnode *vp;
   2663 	struct disklabel label;
   2664 	struct device *dv;
   2665 	char *cd_name;
   2666 	dev_t dev;
   2667 	int error;
   2668 	int i;
   2669 	int good_one;
   2670 	RF_ComponentLabel_t *clabel;
   2671 	RF_AutoConfig_t *ac_list;
   2672 	RF_AutoConfig_t *ac;
   2673 
   2674 
   2675 	/* initialize the AutoConfig list */
   2676 	ac_list = NULL;
   2677 
   2678 	/* we begin by trolling through *all* the devices on the system */
   2679 
   2680 	for (dv = alldevs.tqh_first; dv != NULL;
   2681 	     dv = dv->dv_list.tqe_next) {
   2682 
   2683 		/* we are only interested in disks... */
   2684 		if (dv->dv_class != DV_DISK)
   2685 			continue;
   2686 
   2687 		/* we don't care about floppies... */
   2688 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2689 			continue;
   2690 		}
   2691 		/* hdfd is the Atari/Hades floppy driver */
   2692 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2693 			continue;
   2694 		}
   2695 		/* fdisa is the Atari/Milan floppy driver */
   2696 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
   2697 			continue;
   2698 		}
   2699 
   2700 		/* need to find the device_name_to_block_device_major stuff */
   2701 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2702 		dtobdm = dev_name2blk;
   2703 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2704 			dtobdm++;
   2705 		}
   2706 
   2707 		/* get a vnode for the raw partition of this disk */
   2708 
   2709 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2710 		if (bdevvp(dev, &vp))
   2711 			panic("RAID can't alloc vnode");
   2712 
   2713 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2714 
   2715 		if (error) {
   2716 			/* "Who cares."  Continue looking
   2717 			   for something that exists*/
   2718 			vput(vp);
   2719 			continue;
   2720 		}
   2721 
   2722 		/* Ok, the disk exists.  Go get the disklabel. */
   2723 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2724 				  FREAD, NOCRED, 0);
   2725 		if (error) {
   2726 			/*
   2727 			 * XXX can't happen - open() would
   2728 			 * have errored out (or faked up one)
   2729 			 */
   2730 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2731 			       dv->dv_xname, 'a' + RAW_PART, error);
   2732 		}
   2733 
   2734 		/* don't need this any more.  We'll allocate it again
   2735 		   a little later if we really do... */
   2736 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2737 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2738 		vput(vp);
   2739 
   2740 		for (i=0; i < label.d_npartitions; i++) {
   2741 			/* We only support partitions marked as RAID */
   2742 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2743 				continue;
   2744 
   2745 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2746 			if (bdevvp(dev, &vp))
   2747 				panic("RAID can't alloc vnode");
   2748 
   2749 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2750 			if (error) {
   2751 				/* Whatever... */
   2752 				vput(vp);
   2753 				continue;
   2754 			}
   2755 
   2756 			good_one = 0;
   2757 
   2758 			clabel = (RF_ComponentLabel_t *)
   2759 				malloc(sizeof(RF_ComponentLabel_t),
   2760 				       M_RAIDFRAME, M_NOWAIT);
   2761 			if (clabel == NULL) {
   2762 				/* XXX CLEANUP HERE */
   2763 				printf("RAID auto config: out of memory!\n");
   2764 				return(NULL); /* XXX probably should panic? */
   2765 			}
   2766 
   2767 			if (!raidread_component_label(dev, vp, clabel)) {
   2768 				/* Got the label.  Does it look reasonable? */
   2769 				if (rf_reasonable_label(clabel) &&
   2770 				    (clabel->partitionSize <=
   2771 				     label.d_partitions[i].p_size)) {
   2772 #if DEBUG
   2773 					printf("Component on: %s%c: %d\n",
   2774 					       dv->dv_xname, 'a'+i,
   2775 					       label.d_partitions[i].p_size);
   2776 					rf_print_component_label(clabel);
   2777 #endif
   2778 					/* if it's reasonable, add it,
   2779 					   else ignore it. */
   2780 					ac = (RF_AutoConfig_t *)
   2781 						malloc(sizeof(RF_AutoConfig_t),
   2782 						       M_RAIDFRAME,
   2783 						       M_NOWAIT);
   2784 					if (ac == NULL) {
   2785 						/* XXX should panic?? */
   2786 						return(NULL);
   2787 					}
   2788 
   2789 					sprintf(ac->devname, "%s%c",
   2790 						dv->dv_xname, 'a'+i);
   2791 					ac->dev = dev;
   2792 					ac->vp = vp;
   2793 					ac->clabel = clabel;
   2794 					ac->next = ac_list;
   2795 					ac_list = ac;
   2796 					good_one = 1;
   2797 				}
   2798 			}
   2799 			if (!good_one) {
   2800 				/* cleanup */
   2801 				free(clabel, M_RAIDFRAME);
   2802 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2803 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2804 				vput(vp);
   2805 			}
   2806 		}
   2807 	}
   2808 	return(ac_list);
   2809 }
   2810 
   2811 static int
   2812 rf_reasonable_label(clabel)
   2813 	RF_ComponentLabel_t *clabel;
   2814 {
   2815 
   2816 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2817 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2818 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2819 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2820 	    clabel->row >=0 &&
   2821 	    clabel->column >= 0 &&
   2822 	    clabel->num_rows > 0 &&
   2823 	    clabel->num_columns > 0 &&
   2824 	    clabel->row < clabel->num_rows &&
   2825 	    clabel->column < clabel->num_columns &&
   2826 	    clabel->blockSize > 0 &&
   2827 	    clabel->numBlocks > 0) {
   2828 		/* label looks reasonable enough... */
   2829 		return(1);
   2830 	}
   2831 	return(0);
   2832 }
   2833 
   2834 
   2835 void
   2836 rf_print_component_label(clabel)
   2837 	RF_ComponentLabel_t *clabel;
   2838 {
   2839 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2840 	       clabel->row, clabel->column,
   2841 	       clabel->num_rows, clabel->num_columns);
   2842 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2843 	       clabel->version, clabel->serial_number,
   2844 	       clabel->mod_counter);
   2845 	printf("   Clean: %s Status: %d\n",
   2846 	       clabel->clean ? "Yes" : "No", clabel->status );
   2847 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2848 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2849 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2850 	       (char) clabel->parityConfig, clabel->blockSize,
   2851 	       clabel->numBlocks);
   2852 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2853 	printf("   Contains root partition: %s\n",
   2854 	       clabel->root_partition ? "Yes" : "No" );
   2855 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2856 #if 0
   2857 	   printf("   Config order: %d\n", clabel->config_order);
   2858 #endif
   2859 
   2860 }
   2861 
   2862 RF_ConfigSet_t *
   2863 rf_create_auto_sets(ac_list)
   2864 	RF_AutoConfig_t *ac_list;
   2865 {
   2866 	RF_AutoConfig_t *ac;
   2867 	RF_ConfigSet_t *config_sets;
   2868 	RF_ConfigSet_t *cset;
   2869 	RF_AutoConfig_t *ac_next;
   2870 
   2871 
   2872 	config_sets = NULL;
   2873 
   2874 	/* Go through the AutoConfig list, and figure out which components
   2875 	   belong to what sets.  */
   2876 	ac = ac_list;
   2877 	while(ac!=NULL) {
   2878 		/* we're going to putz with ac->next, so save it here
   2879 		   for use at the end of the loop */
   2880 		ac_next = ac->next;
   2881 
   2882 		if (config_sets == NULL) {
   2883 			/* will need at least this one... */
   2884 			config_sets = (RF_ConfigSet_t *)
   2885 				malloc(sizeof(RF_ConfigSet_t),
   2886 				       M_RAIDFRAME, M_NOWAIT);
   2887 			if (config_sets == NULL) {
   2888 				panic("rf_create_auto_sets: No memory!\n");
   2889 			}
   2890 			/* this one is easy :) */
   2891 			config_sets->ac = ac;
   2892 			config_sets->next = NULL;
   2893 			config_sets->rootable = 0;
   2894 			ac->next = NULL;
   2895 		} else {
   2896 			/* which set does this component fit into? */
   2897 			cset = config_sets;
   2898 			while(cset!=NULL) {
   2899 				if (rf_does_it_fit(cset, ac)) {
   2900 					/* looks like it matches... */
   2901 					ac->next = cset->ac;
   2902 					cset->ac = ac;
   2903 					break;
   2904 				}
   2905 				cset = cset->next;
   2906 			}
   2907 			if (cset==NULL) {
   2908 				/* didn't find a match above... new set..*/
   2909 				cset = (RF_ConfigSet_t *)
   2910 					malloc(sizeof(RF_ConfigSet_t),
   2911 					       M_RAIDFRAME, M_NOWAIT);
   2912 				if (cset == NULL) {
   2913 					panic("rf_create_auto_sets: No memory!\n");
   2914 				}
   2915 				cset->ac = ac;
   2916 				ac->next = NULL;
   2917 				cset->next = config_sets;
   2918 				cset->rootable = 0;
   2919 				config_sets = cset;
   2920 			}
   2921 		}
   2922 		ac = ac_next;
   2923 	}
   2924 
   2925 
   2926 	return(config_sets);
   2927 }
   2928 
   2929 static int
   2930 rf_does_it_fit(cset, ac)
   2931 	RF_ConfigSet_t *cset;
   2932 	RF_AutoConfig_t *ac;
   2933 {
   2934 	RF_ComponentLabel_t *clabel1, *clabel2;
   2935 
   2936 	/* If this one matches the *first* one in the set, that's good
   2937 	   enough, since the other members of the set would have been
   2938 	   through here too... */
   2939 	/* note that we are not checking partitionSize here..
   2940 
   2941 	   Note that we are also not checking the mod_counters here.
   2942 	   If everything else matches execpt the mod_counter, that's
   2943 	   good enough for this test.  We will deal with the mod_counters
   2944 	   a little later in the autoconfiguration process.
   2945 
   2946 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2947 
   2948 	   The reason we don't check for this is that failed disks
   2949 	   will have lower modification counts.  If those disks are
   2950 	   not added to the set they used to belong to, then they will
   2951 	   form their own set, which may result in 2 different sets,
   2952 	   for example, competing to be configured at raid0, and
   2953 	   perhaps competing to be the root filesystem set.  If the
   2954 	   wrong ones get configured, or both attempt to become /,
   2955 	   weird behaviour and or serious lossage will occur.  Thus we
   2956 	   need to bring them into the fold here, and kick them out at
   2957 	   a later point.
   2958 
   2959 	*/
   2960 
   2961 	clabel1 = cset->ac->clabel;
   2962 	clabel2 = ac->clabel;
   2963 	if ((clabel1->version == clabel2->version) &&
   2964 	    (clabel1->serial_number == clabel2->serial_number) &&
   2965 	    (clabel1->num_rows == clabel2->num_rows) &&
   2966 	    (clabel1->num_columns == clabel2->num_columns) &&
   2967 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2968 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2969 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2970 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2971 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2972 	    (clabel1->blockSize == clabel2->blockSize) &&
   2973 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2974 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2975 	    (clabel1->root_partition == clabel2->root_partition) &&
   2976 	    (clabel1->last_unit == clabel2->last_unit) &&
   2977 	    (clabel1->config_order == clabel2->config_order)) {
   2978 		/* if it get's here, it almost *has* to be a match */
   2979 	} else {
   2980 		/* it's not consistent with somebody in the set..
   2981 		   punt */
   2982 		return(0);
   2983 	}
   2984 	/* all was fine.. it must fit... */
   2985 	return(1);
   2986 }
   2987 
   2988 int
   2989 rf_have_enough_components(cset)
   2990 	RF_ConfigSet_t *cset;
   2991 {
   2992 	RF_AutoConfig_t *ac;
   2993 	RF_AutoConfig_t *auto_config;
   2994 	RF_ComponentLabel_t *clabel;
   2995 	int r,c;
   2996 	int num_rows;
   2997 	int num_cols;
   2998 	int num_missing;
   2999 	int mod_counter;
   3000 	int mod_counter_found;
   3001 	int even_pair_failed;
   3002 	char parity_type;
   3003 
   3004 
   3005 	/* check to see that we have enough 'live' components
   3006 	   of this set.  If so, we can configure it if necessary */
   3007 
   3008 	num_rows = cset->ac->clabel->num_rows;
   3009 	num_cols = cset->ac->clabel->num_columns;
   3010 	parity_type = cset->ac->clabel->parityConfig;
   3011 
   3012 	/* XXX Check for duplicate components!?!?!? */
   3013 
   3014 	/* Determine what the mod_counter is supposed to be for this set. */
   3015 
   3016 	mod_counter_found = 0;
   3017 	mod_counter = 0;
   3018 	ac = cset->ac;
   3019 	while(ac!=NULL) {
   3020 		if (mod_counter_found==0) {
   3021 			mod_counter = ac->clabel->mod_counter;
   3022 			mod_counter_found = 1;
   3023 		} else {
   3024 			if (ac->clabel->mod_counter > mod_counter) {
   3025 				mod_counter = ac->clabel->mod_counter;
   3026 			}
   3027 		}
   3028 		ac = ac->next;
   3029 	}
   3030 
   3031 	num_missing = 0;
   3032 	auto_config = cset->ac;
   3033 
   3034 	for(r=0; r<num_rows; r++) {
   3035 		even_pair_failed = 0;
   3036 		for(c=0; c<num_cols; c++) {
   3037 			ac = auto_config;
   3038 			while(ac!=NULL) {
   3039 				if ((ac->clabel->row == r) &&
   3040 				    (ac->clabel->column == c) &&
   3041 				    (ac->clabel->mod_counter == mod_counter)) {
   3042 					/* it's this one... */
   3043 #if DEBUG
   3044 					printf("Found: %s at %d,%d\n",
   3045 					       ac->devname,r,c);
   3046 #endif
   3047 					break;
   3048 				}
   3049 				ac=ac->next;
   3050 			}
   3051 			if (ac==NULL) {
   3052 				/* Didn't find one here! */
   3053 				/* special case for RAID 1, especially
   3054 				   where there are more than 2
   3055 				   components (where RAIDframe treats
   3056 				   things a little differently :( ) */
   3057 				if (parity_type == '1') {
   3058 					if (c%2 == 0) { /* even component */
   3059 						even_pair_failed = 1;
   3060 					} else { /* odd component.  If
   3061                                                     we're failed, and
   3062                                                     so is the even
   3063                                                     component, it's
   3064                                                     "Good Night, Charlie" */
   3065 						if (even_pair_failed == 1) {
   3066 							return(0);
   3067 						}
   3068 					}
   3069 				} else {
   3070 					/* normal accounting */
   3071 					num_missing++;
   3072 				}
   3073 			}
   3074 			if ((parity_type == '1') && (c%2 == 1)) {
   3075 				/* Just did an even component, and we didn't
   3076 				   bail.. reset the even_pair_failed flag,
   3077 				   and go on to the next component.... */
   3078 				even_pair_failed = 0;
   3079 			}
   3080 		}
   3081 	}
   3082 
   3083 	clabel = cset->ac->clabel;
   3084 
   3085 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3086 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3087 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3088 		/* XXX this needs to be made *much* more general */
   3089 		/* Too many failures */
   3090 		return(0);
   3091 	}
   3092 	/* otherwise, all is well, and we've got enough to take a kick
   3093 	   at autoconfiguring this set */
   3094 	return(1);
   3095 }
   3096 
   3097 void
   3098 rf_create_configuration(ac,config,raidPtr)
   3099 	RF_AutoConfig_t *ac;
   3100 	RF_Config_t *config;
   3101 	RF_Raid_t *raidPtr;
   3102 {
   3103 	RF_ComponentLabel_t *clabel;
   3104 	int i;
   3105 
   3106 	clabel = ac->clabel;
   3107 
   3108 	/* 1. Fill in the common stuff */
   3109 	config->numRow = clabel->num_rows;
   3110 	config->numCol = clabel->num_columns;
   3111 	config->numSpare = 0; /* XXX should this be set here? */
   3112 	config->sectPerSU = clabel->sectPerSU;
   3113 	config->SUsPerPU = clabel->SUsPerPU;
   3114 	config->SUsPerRU = clabel->SUsPerRU;
   3115 	config->parityConfig = clabel->parityConfig;
   3116 	/* XXX... */
   3117 	strcpy(config->diskQueueType,"fifo");
   3118 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3119 	config->layoutSpecificSize = 0; /* XXX ?? */
   3120 
   3121 	while(ac!=NULL) {
   3122 		/* row/col values will be in range due to the checks
   3123 		   in reasonable_label() */
   3124 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3125 		       ac->devname);
   3126 		ac = ac->next;
   3127 	}
   3128 
   3129 	for(i=0;i<RF_MAXDBGV;i++) {
   3130 		config->debugVars[i][0] = NULL;
   3131 	}
   3132 }
   3133 
   3134 int
   3135 rf_set_autoconfig(raidPtr, new_value)
   3136 	RF_Raid_t *raidPtr;
   3137 	int new_value;
   3138 {
   3139 	RF_ComponentLabel_t clabel;
   3140 	struct vnode *vp;
   3141 	dev_t dev;
   3142 	int row, column;
   3143 
   3144 	raidPtr->autoconfigure = new_value;
   3145 	for(row=0; row<raidPtr->numRow; row++) {
   3146 		for(column=0; column<raidPtr->numCol; column++) {
   3147 			if (raidPtr->Disks[row][column].status ==
   3148 			    rf_ds_optimal) {
   3149 				dev = raidPtr->Disks[row][column].dev;
   3150 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3151 				raidread_component_label(dev, vp, &clabel);
   3152 				clabel.autoconfigure = new_value;
   3153 				raidwrite_component_label(dev, vp, &clabel);
   3154 			}
   3155 		}
   3156 	}
   3157 	return(new_value);
   3158 }
   3159 
   3160 int
   3161 rf_set_rootpartition(raidPtr, new_value)
   3162 	RF_Raid_t *raidPtr;
   3163 	int new_value;
   3164 {
   3165 	RF_ComponentLabel_t clabel;
   3166 	struct vnode *vp;
   3167 	dev_t dev;
   3168 	int row, column;
   3169 
   3170 	raidPtr->root_partition = new_value;
   3171 	for(row=0; row<raidPtr->numRow; row++) {
   3172 		for(column=0; column<raidPtr->numCol; column++) {
   3173 			if (raidPtr->Disks[row][column].status ==
   3174 			    rf_ds_optimal) {
   3175 				dev = raidPtr->Disks[row][column].dev;
   3176 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3177 				raidread_component_label(dev, vp, &clabel);
   3178 				clabel.root_partition = new_value;
   3179 				raidwrite_component_label(dev, vp, &clabel);
   3180 			}
   3181 		}
   3182 	}
   3183 	return(new_value);
   3184 }
   3185 
   3186 void
   3187 rf_release_all_vps(cset)
   3188 	RF_ConfigSet_t *cset;
   3189 {
   3190 	RF_AutoConfig_t *ac;
   3191 
   3192 	ac = cset->ac;
   3193 	while(ac!=NULL) {
   3194 		/* Close the vp, and give it back */
   3195 		if (ac->vp) {
   3196 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3197 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3198 			vput(ac->vp);
   3199 			ac->vp = NULL;
   3200 		}
   3201 		ac = ac->next;
   3202 	}
   3203 }
   3204 
   3205 
   3206 void
   3207 rf_cleanup_config_set(cset)
   3208 	RF_ConfigSet_t *cset;
   3209 {
   3210 	RF_AutoConfig_t *ac;
   3211 	RF_AutoConfig_t *next_ac;
   3212 
   3213 	ac = cset->ac;
   3214 	while(ac!=NULL) {
   3215 		next_ac = ac->next;
   3216 		/* nuke the label */
   3217 		free(ac->clabel, M_RAIDFRAME);
   3218 		/* cleanup the config structure */
   3219 		free(ac, M_RAIDFRAME);
   3220 		/* "next.." */
   3221 		ac = next_ac;
   3222 	}
   3223 	/* and, finally, nuke the config set */
   3224 	free(cset, M_RAIDFRAME);
   3225 }
   3226 
   3227 
   3228 void
   3229 raid_init_component_label(raidPtr, clabel)
   3230 	RF_Raid_t *raidPtr;
   3231 	RF_ComponentLabel_t *clabel;
   3232 {
   3233 	/* current version number */
   3234 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3235 	clabel->serial_number = raidPtr->serial_number;
   3236 	clabel->mod_counter = raidPtr->mod_counter;
   3237 	clabel->num_rows = raidPtr->numRow;
   3238 	clabel->num_columns = raidPtr->numCol;
   3239 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3240 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3241 
   3242 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3243 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3244 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3245 
   3246 	clabel->blockSize = raidPtr->bytesPerSector;
   3247 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3248 
   3249 	/* XXX not portable */
   3250 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3251 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3252 	clabel->autoconfigure = raidPtr->autoconfigure;
   3253 	clabel->root_partition = raidPtr->root_partition;
   3254 	clabel->last_unit = raidPtr->raidid;
   3255 	clabel->config_order = raidPtr->config_order;
   3256 }
   3257 
   3258 int
   3259 rf_auto_config_set(cset,unit)
   3260 	RF_ConfigSet_t *cset;
   3261 	int *unit;
   3262 {
   3263 	RF_Raid_t *raidPtr;
   3264 	RF_Config_t *config;
   3265 	int raidID;
   3266 	int retcode;
   3267 
   3268 #if DEBUG
   3269 	printf("RAID autoconfigure\n");
   3270 #endif
   3271 
   3272 	retcode = 0;
   3273 	*unit = -1;
   3274 
   3275 	/* 1. Create a config structure */
   3276 
   3277 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3278 				       M_RAIDFRAME,
   3279 				       M_NOWAIT);
   3280 	if (config==NULL) {
   3281 		printf("Out of mem!?!?\n");
   3282 				/* XXX do something more intelligent here. */
   3283 		return(1);
   3284 	}
   3285 
   3286 	memset(config, 0, sizeof(RF_Config_t));
   3287 
   3288 	/* XXX raidID needs to be set correctly.. */
   3289 
   3290 	/*
   3291 	   2. Figure out what RAID ID this one is supposed to live at
   3292 	   See if we can get the same RAID dev that it was configured
   3293 	   on last time..
   3294 	*/
   3295 
   3296 	raidID = cset->ac->clabel->last_unit;
   3297 	if ((raidID < 0) || (raidID >= numraid)) {
   3298 		/* let's not wander off into lala land. */
   3299 		raidID = numraid - 1;
   3300 	}
   3301 	if (raidPtrs[raidID]->valid != 0) {
   3302 
   3303 		/*
   3304 		   Nope... Go looking for an alternative...
   3305 		   Start high so we don't immediately use raid0 if that's
   3306 		   not taken.
   3307 		*/
   3308 
   3309 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3310 			if (raidPtrs[raidID]->valid == 0) {
   3311 				/* can use this one! */
   3312 				break;
   3313 			}
   3314 		}
   3315 	}
   3316 
   3317 	if (raidID < 0) {
   3318 		/* punt... */
   3319 		printf("Unable to auto configure this set!\n");
   3320 		printf("(Out of RAID devs!)\n");
   3321 		return(1);
   3322 	}
   3323 
   3324 #if DEBUG
   3325 	printf("Configuring raid%d:\n",raidID);
   3326 #endif
   3327 
   3328 	raidPtr = raidPtrs[raidID];
   3329 
   3330 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3331 	raidPtr->raidid = raidID;
   3332 	raidPtr->openings = RAIDOUTSTANDING;
   3333 
   3334 	/* 3. Build the configuration structure */
   3335 	rf_create_configuration(cset->ac, config, raidPtr);
   3336 
   3337 	/* 4. Do the configuration */
   3338 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3339 
   3340 	if (retcode == 0) {
   3341 
   3342 		raidinit(raidPtrs[raidID]);
   3343 
   3344 		rf_markalldirty(raidPtrs[raidID]);
   3345 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3346 		if (cset->ac->clabel->root_partition==1) {
   3347 			/* everything configured just fine.  Make a note
   3348 			   that this set is eligible to be root. */
   3349 			cset->rootable = 1;
   3350 			/* XXX do this here? */
   3351 			raidPtrs[raidID]->root_partition = 1;
   3352 		}
   3353 	}
   3354 
   3355 	/* 5. Cleanup */
   3356 	free(config, M_RAIDFRAME);
   3357 
   3358 	*unit = raidID;
   3359 	return(retcode);
   3360 }
   3361 
   3362 void
   3363 rf_disk_unbusy(desc)
   3364 	RF_RaidAccessDesc_t *desc;
   3365 {
   3366 	struct buf *bp;
   3367 
   3368 	bp = (struct buf *)desc->bp;
   3369 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3370 			    (bp->b_bcount - bp->b_resid));
   3371 }
   3372