Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.60
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.60 2000/02/25 19:56:56 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit __P((RF_Raid_t *));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208 */
    209 
    210 struct raid_softc {
    211 	int     sc_flags;	/* flags */
    212 	int     sc_cflags;	/* configuration flags */
    213 	size_t  sc_size;        /* size of the raid device */
    214 	char    sc_xname[20];	/* XXX external name */
    215 	struct disk sc_dkdev;	/* generic disk device info */
    216 	struct pool sc_cbufpool;	/* component buffer pool */
    217 	struct buf_queue buf_queue;	/* used for the device queue */
    218 };
    219 /* sc_flags */
    220 #define RAIDF_INITED	0x01	/* unit has been initialized */
    221 #define RAIDF_WLABEL	0x02	/* label area is writable */
    222 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    223 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    224 #define RAIDF_LOCKED	0x80	/* unit is locked */
    225 
    226 #define	raidunit(x)	DISKUNIT(x)
    227 int numraid = 0;
    228 
    229 /*
    230  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    231  * Be aware that large numbers can allow the driver to consume a lot of
    232  * kernel memory, especially on writes, and in degraded mode reads.
    233  *
    234  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    235  * a single 64K write will typically require 64K for the old data,
    236  * 64K for the old parity, and 64K for the new parity, for a total
    237  * of 192K (if the parity buffer is not re-used immediately).
    238  * Even it if is used immedately, that's still 128K, which when multiplied
    239  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    240  *
    241  * Now in degraded mode, for example, a 64K read on the above setup may
    242  * require data reconstruction, which will require *all* of the 4 remaining
    243  * disks to participate -- 4 * 32K/disk == 128K again.
    244  */
    245 
    246 #ifndef RAIDOUTSTANDING
    247 #define RAIDOUTSTANDING   6
    248 #endif
    249 
    250 #define RAIDLABELDEV(dev)	\
    251 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    252 
    253 /* declared here, and made public, for the benefit of KVM stuff.. */
    254 struct raid_softc *raid_softc;
    255 
    256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    257 				     struct disklabel *));
    258 static void raidgetdisklabel __P((dev_t));
    259 static void raidmakedisklabel __P((struct raid_softc *));
    260 
    261 static int raidlock __P((struct raid_softc *));
    262 static void raidunlock __P((struct raid_softc *));
    263 
    264 static void rf_markalldirty __P((RF_Raid_t *));
    265 void rf_mountroot_hook __P((struct device *));
    266 
    267 struct device *raidrootdev;
    268 struct cfdata cf_raidrootdev;
    269 struct cfdriver cfdrv;
    270 /* XXX these should be moved up */
    271 #include "rf_configure.h"
    272 #include <sys/reboot.h>
    273 
    274 void rf_ReconThread __P((struct rf_recon_req *));
    275 /* XXX what I want is: */
    276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    280 void rf_buildroothack __P((void *));
    281 
    282 RF_AutoConfig_t *rf_find_raid_components __P((void));
    283 void print_component_label __P((RF_ComponentLabel_t *));
    284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    288 				  RF_Raid_t *));
    289 int rf_set_autoconfig __P((RF_Raid_t *, int));
    290 int rf_set_rootpartition __P((RF_Raid_t *, int));
    291 void rf_release_all_vps __P((RF_ConfigSet_t *));
    292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    293 int rf_have_enough_components __P((RF_ConfigSet_t *));
    294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    295 
    296 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    297 				  allow autoconfig to take place */
    298 extern struct device *booted_device;
    299 
    300 void
    301 raidattach(num)
    302 	int     num;
    303 {
    304 	int raidID;
    305 	int i, rc;
    306 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    307 	RF_ConfigSet_t *config_sets;
    308 
    309 #ifdef DEBUG
    310 	printf("raidattach: Asked for %d units\n", num);
    311 #endif
    312 
    313 	if (num <= 0) {
    314 #ifdef DIAGNOSTIC
    315 		panic("raidattach: count <= 0");
    316 #endif
    317 		return;
    318 	}
    319 	/* This is where all the initialization stuff gets done. */
    320 
    321 	numraid = num;
    322 
    323 	/* Make some space for requested number of units... */
    324 
    325 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    326 	if (raidPtrs == NULL) {
    327 		panic("raidPtrs is NULL!!\n");
    328 	}
    329 
    330 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    331 	if (rc) {
    332 		RF_PANIC();
    333 	}
    334 
    335 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    336 
    337 	for (i = 0; i < num; i++)
    338 		raidPtrs[i] = NULL;
    339 	rc = rf_BootRaidframe();
    340 	if (rc == 0)
    341 		printf("Kernelized RAIDframe activated\n");
    342 	else
    343 		panic("Serious error booting RAID!!\n");
    344 
    345 	/* put together some datastructures like the CCD device does.. This
    346 	 * lets us lock the device and what-not when it gets opened. */
    347 
    348 	raid_softc = (struct raid_softc *)
    349 		malloc(num * sizeof(struct raid_softc),
    350 		       M_RAIDFRAME, M_NOWAIT);
    351 	if (raid_softc == NULL) {
    352 		printf("WARNING: no memory for RAIDframe driver\n");
    353 		return;
    354 	}
    355 
    356 	bzero(raid_softc, num * sizeof(struct raid_softc));
    357 
    358 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    359 					      M_RAIDFRAME, M_NOWAIT);
    360 	if (raidrootdev == NULL) {
    361 		panic("No memory for RAIDframe driver!!?!?!\n");
    362 	}
    363 
    364 	for (raidID = 0; raidID < num; raidID++) {
    365 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    366 
    367 		raidrootdev[raidID].dv_class  = DV_DISK;
    368 		raidrootdev[raidID].dv_cfdata = NULL;
    369 		raidrootdev[raidID].dv_unit   = raidID;
    370 		raidrootdev[raidID].dv_parent = NULL;
    371 		raidrootdev[raidID].dv_flags  = 0;
    372 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    373 
    374 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    375 			  (RF_Raid_t *));
    376 		if (raidPtrs[raidID] == NULL) {
    377 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    378 			numraid = raidID;
    379 			return;
    380 		}
    381 	}
    382 
    383 if (raidautoconfig) {
    384 	/* 1. locate all RAID components on the system */
    385 
    386 #if DEBUG
    387 	printf("Searching for raid components...\n");
    388 #endif
    389 	ac_list = rf_find_raid_components();
    390 
    391 	/* 2. sort them into their respective sets */
    392 
    393 	config_sets = rf_create_auto_sets(ac_list);
    394 
    395 	/* 3. evaluate each set and configure the valid ones
    396 	   This gets done in rf_buildroothack() */
    397 
    398 	/* schedule the creation of the thread to do the
    399 	   "/ on RAID" stuff */
    400 
    401 	kthread_create(rf_buildroothack,config_sets);
    402 
    403 	/* 4. make sure we get our mud.. I mean root.. hooks in.. */
    404 	/* XXXX pick raid0 for now... and this should be only done
    405 	   if we find something that's bootable!!! */
    406 #if 0
    407 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    408 #endif
    409 }
    410 
    411 }
    412 
    413 void
    414 rf_buildroothack(arg)
    415 	void *arg;
    416 {
    417 	RF_ConfigSet_t *config_sets = arg;
    418 	RF_ConfigSet_t *cset;
    419 	RF_ConfigSet_t *next_cset;
    420 	int retcode;
    421 	int raidID;
    422 	int rootID;
    423 	int num_root;
    424 
    425 	num_root = 0;
    426 	cset = config_sets;
    427 	while(cset != NULL ) {
    428 		next_cset = cset->next;
    429 		if (rf_have_enough_components(cset) &&
    430 		    cset->ac->clabel->autoconfigure==1) {
    431 			retcode = rf_auto_config_set(cset,&raidID);
    432 			if (!retcode) {
    433 				if (cset->rootable) {
    434 					rootID = raidID;
    435 					num_root++;
    436 				}
    437 			} else {
    438 				/* The autoconfig didn't work :( */
    439 #if DEBUG
    440 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    441 #endif
    442 				rf_release_all_vps(cset);
    443 #if DEBUG
    444 				printf("Done cleanup\n");
    445 #endif
    446 			}
    447 		} else {
    448 			/* we're not autoconfiguring this set...
    449 			   release the associated resources */
    450 #if DEBUG
    451 			printf("Releasing vp's\n");
    452 #endif
    453 			rf_release_all_vps(cset);
    454 #if DEBUG
    455 			printf("Done.\n");
    456 #endif
    457 		}
    458 		/* cleanup */
    459 #if DEBUG
    460 		printf("Cleaning up config set\n");
    461 #endif
    462 		rf_cleanup_config_set(cset);
    463 #if DEBUG
    464 		printf("Done cleanup\n");
    465 #endif
    466 		cset = next_cset;
    467 	}
    468 	if (boothowto & RB_ASKNAME) {
    469 		/* We don't auto-config... */
    470 	} else {
    471 		/* They didn't ask, and we found something bootable... */
    472 		/* XXX pretend for now.. */
    473 		if (num_root == 1) {
    474 #if 1
    475 			booted_device = &raidrootdev[rootID];
    476 #endif
    477 		} else if (num_root > 1) {
    478 			/* we can't guess.. require the user to answer... */
    479 			boothowto |= RB_ASKNAME;
    480 		}
    481 	}
    482 }
    483 
    484 
    485 int
    486 raidsize(dev)
    487 	dev_t   dev;
    488 {
    489 	struct raid_softc *rs;
    490 	struct disklabel *lp;
    491 	int     part, unit, omask, size;
    492 
    493 	unit = raidunit(dev);
    494 	if (unit >= numraid)
    495 		return (-1);
    496 	rs = &raid_softc[unit];
    497 
    498 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    499 		return (-1);
    500 
    501 	part = DISKPART(dev);
    502 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    503 	lp = rs->sc_dkdev.dk_label;
    504 
    505 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    506 		return (-1);
    507 
    508 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    509 		size = -1;
    510 	else
    511 		size = lp->d_partitions[part].p_size *
    512 		    (lp->d_secsize / DEV_BSIZE);
    513 
    514 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    515 		return (-1);
    516 
    517 	return (size);
    518 
    519 }
    520 
    521 int
    522 raiddump(dev, blkno, va, size)
    523 	dev_t   dev;
    524 	daddr_t blkno;
    525 	caddr_t va;
    526 	size_t  size;
    527 {
    528 	/* Not implemented. */
    529 	return ENXIO;
    530 }
    531 /* ARGSUSED */
    532 int
    533 raidopen(dev, flags, fmt, p)
    534 	dev_t   dev;
    535 	int     flags, fmt;
    536 	struct proc *p;
    537 {
    538 	int     unit = raidunit(dev);
    539 	struct raid_softc *rs;
    540 	struct disklabel *lp;
    541 	int     part, pmask;
    542 	int     error = 0;
    543 
    544 	if (unit >= numraid)
    545 		return (ENXIO);
    546 	rs = &raid_softc[unit];
    547 
    548 	if ((error = raidlock(rs)) != 0)
    549 		return (error);
    550 	lp = rs->sc_dkdev.dk_label;
    551 
    552 	part = DISKPART(dev);
    553 	pmask = (1 << part);
    554 
    555 	db1_printf(("Opening raid device number: %d partition: %d\n",
    556 		unit, part));
    557 
    558 
    559 	if ((rs->sc_flags & RAIDF_INITED) &&
    560 	    (rs->sc_dkdev.dk_openmask == 0))
    561 		raidgetdisklabel(dev);
    562 
    563 	/* make sure that this partition exists */
    564 
    565 	if (part != RAW_PART) {
    566 		db1_printf(("Not a raw partition..\n"));
    567 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    568 		    ((part >= lp->d_npartitions) ||
    569 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    570 			error = ENXIO;
    571 			raidunlock(rs);
    572 			db1_printf(("Bailing out...\n"));
    573 			return (error);
    574 		}
    575 	}
    576 	/* Prevent this unit from being unconfigured while open. */
    577 	switch (fmt) {
    578 	case S_IFCHR:
    579 		rs->sc_dkdev.dk_copenmask |= pmask;
    580 		break;
    581 
    582 	case S_IFBLK:
    583 		rs->sc_dkdev.dk_bopenmask |= pmask;
    584 		break;
    585 	}
    586 
    587 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    588 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    589 		/* First one... mark things as dirty... Note that we *MUST*
    590 		 have done a configure before this.  I DO NOT WANT TO BE
    591 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    592 		 THAT THEY BELONG TOGETHER!!!!! */
    593 		/* XXX should check to see if we're only open for reading
    594 		   here... If so, we needn't do this, but then need some
    595 		   other way of keeping track of what's happened.. */
    596 
    597 		rf_markalldirty( raidPtrs[unit] );
    598 	}
    599 
    600 
    601 	rs->sc_dkdev.dk_openmask =
    602 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    603 
    604 	raidunlock(rs);
    605 
    606 	return (error);
    607 
    608 
    609 }
    610 /* ARGSUSED */
    611 int
    612 raidclose(dev, flags, fmt, p)
    613 	dev_t   dev;
    614 	int     flags, fmt;
    615 	struct proc *p;
    616 {
    617 	int     unit = raidunit(dev);
    618 	struct raid_softc *rs;
    619 	int     error = 0;
    620 	int     part;
    621 
    622 	if (unit >= numraid)
    623 		return (ENXIO);
    624 	rs = &raid_softc[unit];
    625 
    626 	if ((error = raidlock(rs)) != 0)
    627 		return (error);
    628 
    629 	part = DISKPART(dev);
    630 
    631 	/* ...that much closer to allowing unconfiguration... */
    632 	switch (fmt) {
    633 	case S_IFCHR:
    634 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    635 		break;
    636 
    637 	case S_IFBLK:
    638 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    639 		break;
    640 	}
    641 	rs->sc_dkdev.dk_openmask =
    642 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    643 
    644 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    645 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    646 		/* Last one... device is not unconfigured yet.
    647 		   Device shutdown has taken care of setting the
    648 		   clean bits if RAIDF_INITED is not set
    649 		   mark things as clean... */
    650 #ifdef DEBUG
    651 		printf("Last one on raid%d.  Updating status.\n",unit);
    652 #endif
    653 		rf_update_component_labels( raidPtrs[unit] );
    654 	}
    655 
    656 	raidunlock(rs);
    657 	return (0);
    658 
    659 }
    660 
    661 void
    662 raidstrategy(bp)
    663 	register struct buf *bp;
    664 {
    665 	register int s;
    666 
    667 	unsigned int raidID = raidunit(bp->b_dev);
    668 	RF_Raid_t *raidPtr;
    669 	struct raid_softc *rs = &raid_softc[raidID];
    670 	struct disklabel *lp;
    671 	int     wlabel;
    672 
    673 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    674 		bp->b_error = ENXIO;
    675 		bp->b_flags = B_ERROR;
    676 		bp->b_resid = bp->b_bcount;
    677 		biodone(bp);
    678 		return;
    679 	}
    680 	if (raidID >= numraid || !raidPtrs[raidID]) {
    681 		bp->b_error = ENODEV;
    682 		bp->b_flags |= B_ERROR;
    683 		bp->b_resid = bp->b_bcount;
    684 		biodone(bp);
    685 		return;
    686 	}
    687 	raidPtr = raidPtrs[raidID];
    688 	if (!raidPtr->valid) {
    689 		bp->b_error = ENODEV;
    690 		bp->b_flags |= B_ERROR;
    691 		bp->b_resid = bp->b_bcount;
    692 		biodone(bp);
    693 		return;
    694 	}
    695 	if (bp->b_bcount == 0) {
    696 		db1_printf(("b_bcount is zero..\n"));
    697 		biodone(bp);
    698 		return;
    699 	}
    700 	lp = rs->sc_dkdev.dk_label;
    701 
    702 	/*
    703 	 * Do bounds checking and adjust transfer.  If there's an
    704 	 * error, the bounds check will flag that for us.
    705 	 */
    706 
    707 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    708 	if (DISKPART(bp->b_dev) != RAW_PART)
    709 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    710 			db1_printf(("Bounds check failed!!:%d %d\n",
    711 				(int) bp->b_blkno, (int) wlabel));
    712 			biodone(bp);
    713 			return;
    714 		}
    715 	s = splbio();
    716 
    717 	bp->b_resid = 0;
    718 
    719 	/* stuff it onto our queue */
    720 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    721 
    722 	raidstart(raidPtrs[raidID]);
    723 
    724 	splx(s);
    725 }
    726 /* ARGSUSED */
    727 int
    728 raidread(dev, uio, flags)
    729 	dev_t   dev;
    730 	struct uio *uio;
    731 	int     flags;
    732 {
    733 	int     unit = raidunit(dev);
    734 	struct raid_softc *rs;
    735 	int     part;
    736 
    737 	if (unit >= numraid)
    738 		return (ENXIO);
    739 	rs = &raid_softc[unit];
    740 
    741 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    742 		return (ENXIO);
    743 	part = DISKPART(dev);
    744 
    745 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    746 
    747 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    748 
    749 }
    750 /* ARGSUSED */
    751 int
    752 raidwrite(dev, uio, flags)
    753 	dev_t   dev;
    754 	struct uio *uio;
    755 	int     flags;
    756 {
    757 	int     unit = raidunit(dev);
    758 	struct raid_softc *rs;
    759 
    760 	if (unit >= numraid)
    761 		return (ENXIO);
    762 	rs = &raid_softc[unit];
    763 
    764 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    765 		return (ENXIO);
    766 	db1_printf(("raidwrite\n"));
    767 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    768 
    769 }
    770 
    771 int
    772 raidioctl(dev, cmd, data, flag, p)
    773 	dev_t   dev;
    774 	u_long  cmd;
    775 	caddr_t data;
    776 	int     flag;
    777 	struct proc *p;
    778 {
    779 	int     unit = raidunit(dev);
    780 	int     error = 0;
    781 	int     part, pmask;
    782 	struct raid_softc *rs;
    783 	RF_Config_t *k_cfg, *u_cfg;
    784 	RF_Raid_t *raidPtr;
    785 	RF_RaidDisk_t *diskPtr;
    786 	RF_AccTotals_t *totals;
    787 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    788 	u_char *specific_buf;
    789 	int retcode = 0;
    790 	int row;
    791 	int column;
    792 	struct rf_recon_req *rrcopy, *rr;
    793 	RF_ComponentLabel_t *clabel;
    794 	RF_ComponentLabel_t ci_label;
    795 	RF_ComponentLabel_t **clabel_ptr;
    796 	RF_SingleComponent_t *sparePtr,*componentPtr;
    797 	RF_SingleComponent_t hot_spare;
    798 	RF_SingleComponent_t component;
    799 	int i, j, d;
    800 
    801 	if (unit >= numraid)
    802 		return (ENXIO);
    803 	rs = &raid_softc[unit];
    804 	raidPtr = raidPtrs[unit];
    805 
    806 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    807 		(int) DISKPART(dev), (int) unit, (int) cmd));
    808 
    809 	/* Must be open for writes for these commands... */
    810 	switch (cmd) {
    811 	case DIOCSDINFO:
    812 	case DIOCWDINFO:
    813 	case DIOCWLABEL:
    814 		if ((flag & FWRITE) == 0)
    815 			return (EBADF);
    816 	}
    817 
    818 	/* Must be initialized for these... */
    819 	switch (cmd) {
    820 	case DIOCGDINFO:
    821 	case DIOCSDINFO:
    822 	case DIOCWDINFO:
    823 	case DIOCGPART:
    824 	case DIOCWLABEL:
    825 	case DIOCGDEFLABEL:
    826 	case RAIDFRAME_SHUTDOWN:
    827 	case RAIDFRAME_REWRITEPARITY:
    828 	case RAIDFRAME_GET_INFO:
    829 	case RAIDFRAME_RESET_ACCTOTALS:
    830 	case RAIDFRAME_GET_ACCTOTALS:
    831 	case RAIDFRAME_KEEP_ACCTOTALS:
    832 	case RAIDFRAME_GET_SIZE:
    833 	case RAIDFRAME_FAIL_DISK:
    834 	case RAIDFRAME_COPYBACK:
    835 	case RAIDFRAME_CHECK_RECON_STATUS:
    836 	case RAIDFRAME_GET_COMPONENT_LABEL:
    837 	case RAIDFRAME_SET_COMPONENT_LABEL:
    838 	case RAIDFRAME_ADD_HOT_SPARE:
    839 	case RAIDFRAME_REMOVE_HOT_SPARE:
    840 	case RAIDFRAME_INIT_LABELS:
    841 	case RAIDFRAME_REBUILD_IN_PLACE:
    842 	case RAIDFRAME_CHECK_PARITY:
    843 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    844 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    845 	case RAIDFRAME_SET_AUTOCONFIG:
    846 	case RAIDFRAME_SET_ROOT:
    847 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    848 			return (ENXIO);
    849 	}
    850 
    851 	switch (cmd) {
    852 
    853 		/* configure the system */
    854 	case RAIDFRAME_CONFIGURE:
    855 
    856 		if (raidPtr->valid) {
    857 			/* There is a valid RAID set running on this unit! */
    858 			printf("raid%d: Device already configured!\n",unit);
    859 		}
    860 
    861 		/* copy-in the configuration information */
    862 		/* data points to a pointer to the configuration structure */
    863 
    864 		u_cfg = *((RF_Config_t **) data);
    865 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    866 		if (k_cfg == NULL) {
    867 			return (ENOMEM);
    868 		}
    869 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    870 		    sizeof(RF_Config_t));
    871 		if (retcode) {
    872 			RF_Free(k_cfg, sizeof(RF_Config_t));
    873 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    874 				retcode));
    875 			return (retcode);
    876 		}
    877 		/* allocate a buffer for the layout-specific data, and copy it
    878 		 * in */
    879 		if (k_cfg->layoutSpecificSize) {
    880 			if (k_cfg->layoutSpecificSize > 10000) {
    881 				/* sanity check */
    882 				RF_Free(k_cfg, sizeof(RF_Config_t));
    883 				return (EINVAL);
    884 			}
    885 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    886 			    (u_char *));
    887 			if (specific_buf == NULL) {
    888 				RF_Free(k_cfg, sizeof(RF_Config_t));
    889 				return (ENOMEM);
    890 			}
    891 			retcode = copyin(k_cfg->layoutSpecific,
    892 			    (caddr_t) specific_buf,
    893 			    k_cfg->layoutSpecificSize);
    894 			if (retcode) {
    895 				RF_Free(k_cfg, sizeof(RF_Config_t));
    896 				RF_Free(specific_buf,
    897 					k_cfg->layoutSpecificSize);
    898 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    899 					retcode));
    900 				return (retcode);
    901 			}
    902 		} else
    903 			specific_buf = NULL;
    904 		k_cfg->layoutSpecific = specific_buf;
    905 
    906 		/* should do some kind of sanity check on the configuration.
    907 		 * Store the sum of all the bytes in the last byte? */
    908 
    909 		/* configure the system */
    910 
    911 		/*
    912 		 * Clear the entire RAID descriptor, just to make sure
    913 		 *  there is no stale data left in the case of a
    914 		 *  reconfiguration
    915 		 */
    916 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    917 		raidPtr->raidid = unit;
    918 
    919 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    920 
    921 		if (retcode == 0) {
    922 
    923 			/* allow this many simultaneous IO's to
    924 			   this RAID device */
    925 			raidPtr->openings = RAIDOUTSTANDING;
    926 
    927 			raidinit(raidPtr);
    928 			rf_markalldirty(raidPtr);
    929 		}
    930 		/* free the buffers.  No return code here. */
    931 		if (k_cfg->layoutSpecificSize) {
    932 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    933 		}
    934 		RF_Free(k_cfg, sizeof(RF_Config_t));
    935 
    936 		return (retcode);
    937 
    938 		/* shutdown the system */
    939 	case RAIDFRAME_SHUTDOWN:
    940 
    941 		if ((error = raidlock(rs)) != 0)
    942 			return (error);
    943 
    944 		/*
    945 		 * If somebody has a partition mounted, we shouldn't
    946 		 * shutdown.
    947 		 */
    948 
    949 		part = DISKPART(dev);
    950 		pmask = (1 << part);
    951 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    952 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    953 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    954 			raidunlock(rs);
    955 			return (EBUSY);
    956 		}
    957 
    958 		retcode = rf_Shutdown(raidPtr);
    959 
    960 		pool_destroy(&rs->sc_cbufpool);
    961 
    962 		/* It's no longer initialized... */
    963 		rs->sc_flags &= ~RAIDF_INITED;
    964 
    965 		/* Detach the disk. */
    966 		disk_detach(&rs->sc_dkdev);
    967 
    968 		raidunlock(rs);
    969 
    970 		return (retcode);
    971 	case RAIDFRAME_GET_COMPONENT_LABEL:
    972 		clabel_ptr = (RF_ComponentLabel_t **) data;
    973 		/* need to read the component label for the disk indicated
    974 		   by row,column in clabel */
    975 
    976 		/* For practice, let's get it directly fromdisk, rather
    977 		   than from the in-core copy */
    978 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    979 			   (RF_ComponentLabel_t *));
    980 		if (clabel == NULL)
    981 			return (ENOMEM);
    982 
    983 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    984 
    985 		retcode = copyin( *clabel_ptr, clabel,
    986 				  sizeof(RF_ComponentLabel_t));
    987 
    988 		if (retcode) {
    989 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    990 			return(retcode);
    991 		}
    992 
    993 		row = clabel->row;
    994 		column = clabel->column;
    995 
    996 		if ((row < 0) || (row >= raidPtr->numRow) ||
    997 		    (column < 0) || (column >= raidPtr->numCol)) {
    998 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    999 			return(EINVAL);
   1000 		}
   1001 
   1002 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1003 				raidPtr->raid_cinfo[row][column].ci_vp,
   1004 				clabel );
   1005 
   1006 		retcode = copyout((caddr_t) clabel,
   1007 				  (caddr_t) *clabel_ptr,
   1008 				  sizeof(RF_ComponentLabel_t));
   1009 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1010 		return (retcode);
   1011 
   1012 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1013 		clabel = (RF_ComponentLabel_t *) data;
   1014 
   1015 		/* XXX check the label for valid stuff... */
   1016 		/* Note that some things *should not* get modified --
   1017 		   the user should be re-initing the labels instead of
   1018 		   trying to patch things.
   1019 		   */
   1020 
   1021 		printf("Got component label:\n");
   1022 		printf("Version: %d\n",clabel->version);
   1023 		printf("Serial Number: %d\n",clabel->serial_number);
   1024 		printf("Mod counter: %d\n",clabel->mod_counter);
   1025 		printf("Row: %d\n", clabel->row);
   1026 		printf("Column: %d\n", clabel->column);
   1027 		printf("Num Rows: %d\n", clabel->num_rows);
   1028 		printf("Num Columns: %d\n", clabel->num_columns);
   1029 		printf("Clean: %d\n", clabel->clean);
   1030 		printf("Status: %d\n", clabel->status);
   1031 
   1032 		row = clabel->row;
   1033 		column = clabel->column;
   1034 
   1035 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1036 		    (column < 0) || (column >= raidPtr->numCol)) {
   1037 			return(EINVAL);
   1038 		}
   1039 
   1040 		/* XXX this isn't allowed to do anything for now :-) */
   1041 
   1042 		/* XXX and before it is, we need to fill in the rest
   1043 		   of the fields!?!?!?! */
   1044 #if 0
   1045 		raidwrite_component_label(
   1046                             raidPtr->Disks[row][column].dev,
   1047 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1048 			    clabel );
   1049 #endif
   1050 		return (0);
   1051 
   1052 	case RAIDFRAME_INIT_LABELS:
   1053 		clabel = (RF_ComponentLabel_t *) data;
   1054 		/*
   1055 		   we only want the serial number from
   1056 		   the above.  We get all the rest of the information
   1057 		   from the config that was used to create this RAID
   1058 		   set.
   1059 		   */
   1060 
   1061 		raidPtr->serial_number = clabel->serial_number;
   1062 
   1063 		raid_init_component_label(raidPtr, &ci_label);
   1064 		ci_label.serial_number = clabel->serial_number;
   1065 
   1066 		for(row=0;row<raidPtr->numRow;row++) {
   1067 			ci_label.row = row;
   1068 			for(column=0;column<raidPtr->numCol;column++) {
   1069 				diskPtr = &raidPtr->Disks[row][column];
   1070 				ci_label.partitionSize = diskPtr->partitionSize;
   1071 				ci_label.column = column;
   1072 				raidwrite_component_label(
   1073 				  raidPtr->Disks[row][column].dev,
   1074 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1075 				  &ci_label );
   1076 			}
   1077 		}
   1078 
   1079 		return (retcode);
   1080 	case RAIDFRAME_SET_AUTOCONFIG:
   1081 		d = rf_set_autoconfig(raidPtr, *data);
   1082 		printf("New autoconfig value is: %d\n", d);
   1083 		*data = d;
   1084 		return (retcode);
   1085 
   1086 	case RAIDFRAME_SET_ROOT:
   1087 		d = rf_set_rootpartition(raidPtr, *data);
   1088 		printf("New rootpartition value is: %d\n", d);
   1089 		*data = d;
   1090 		return (retcode);
   1091 
   1092 		/* initialize all parity */
   1093 	case RAIDFRAME_REWRITEPARITY:
   1094 
   1095 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1096 			/* Parity for RAID 0 is trivially correct */
   1097 			raidPtr->parity_good = RF_RAID_CLEAN;
   1098 			return(0);
   1099 		}
   1100 
   1101 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1102 			/* Re-write is already in progress! */
   1103 			return(EINVAL);
   1104 		}
   1105 
   1106 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1107 					   rf_RewriteParityThread,
   1108 					   raidPtr,"raid_parity");
   1109 		return (retcode);
   1110 
   1111 
   1112 	case RAIDFRAME_ADD_HOT_SPARE:
   1113 		sparePtr = (RF_SingleComponent_t *) data;
   1114 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1115 		printf("Adding spare\n");
   1116 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1117 		return(retcode);
   1118 
   1119 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1120 		return(retcode);
   1121 
   1122 	case RAIDFRAME_REBUILD_IN_PLACE:
   1123 
   1124 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1125 			/* Can't do this on a RAID 0!! */
   1126 			return(EINVAL);
   1127 		}
   1128 
   1129 		if (raidPtr->recon_in_progress == 1) {
   1130 			/* a reconstruct is already in progress! */
   1131 			return(EINVAL);
   1132 		}
   1133 
   1134 		componentPtr = (RF_SingleComponent_t *) data;
   1135 		memcpy( &component, componentPtr,
   1136 			sizeof(RF_SingleComponent_t));
   1137 		row = component.row;
   1138 		column = component.column;
   1139 		printf("Rebuild: %d %d\n",row, column);
   1140 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1141 		    (column < 0) || (column >= raidPtr->numCol)) {
   1142 			return(EINVAL);
   1143 		}
   1144 
   1145 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1146 		if (rrcopy == NULL)
   1147 			return(ENOMEM);
   1148 
   1149 		rrcopy->raidPtr = (void *) raidPtr;
   1150 		rrcopy->row = row;
   1151 		rrcopy->col = column;
   1152 
   1153 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1154 					   rf_ReconstructInPlaceThread,
   1155 					   rrcopy,"raid_reconip");
   1156 		return(retcode);
   1157 
   1158 	case RAIDFRAME_GET_INFO:
   1159 		if (!raidPtr->valid)
   1160 			return (ENODEV);
   1161 		ucfgp = (RF_DeviceConfig_t **) data;
   1162 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1163 			  (RF_DeviceConfig_t *));
   1164 		if (d_cfg == NULL)
   1165 			return (ENOMEM);
   1166 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1167 		d_cfg->rows = raidPtr->numRow;
   1168 		d_cfg->cols = raidPtr->numCol;
   1169 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1170 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1171 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1172 			return (ENOMEM);
   1173 		}
   1174 		d_cfg->nspares = raidPtr->numSpare;
   1175 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1176 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1177 			return (ENOMEM);
   1178 		}
   1179 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1180 		d = 0;
   1181 		for (i = 0; i < d_cfg->rows; i++) {
   1182 			for (j = 0; j < d_cfg->cols; j++) {
   1183 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1184 				d++;
   1185 			}
   1186 		}
   1187 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1188 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1189 		}
   1190 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1191 				  sizeof(RF_DeviceConfig_t));
   1192 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1193 
   1194 		return (retcode);
   1195 
   1196 	case RAIDFRAME_CHECK_PARITY:
   1197 		*(int *) data = raidPtr->parity_good;
   1198 		return (0);
   1199 
   1200 	case RAIDFRAME_RESET_ACCTOTALS:
   1201 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1202 		return (0);
   1203 
   1204 	case RAIDFRAME_GET_ACCTOTALS:
   1205 		totals = (RF_AccTotals_t *) data;
   1206 		*totals = raidPtr->acc_totals;
   1207 		return (0);
   1208 
   1209 	case RAIDFRAME_KEEP_ACCTOTALS:
   1210 		raidPtr->keep_acc_totals = *(int *)data;
   1211 		return (0);
   1212 
   1213 	case RAIDFRAME_GET_SIZE:
   1214 		*(int *) data = raidPtr->totalSectors;
   1215 		return (0);
   1216 
   1217 		/* fail a disk & optionally start reconstruction */
   1218 	case RAIDFRAME_FAIL_DISK:
   1219 
   1220 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1221 			/* Can't do this on a RAID 0!! */
   1222 			return(EINVAL);
   1223 		}
   1224 
   1225 		rr = (struct rf_recon_req *) data;
   1226 
   1227 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1228 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1229 			return (EINVAL);
   1230 
   1231 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1232 		       unit, rr->row, rr->col);
   1233 
   1234 		/* make a copy of the recon request so that we don't rely on
   1235 		 * the user's buffer */
   1236 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1237 		if (rrcopy == NULL)
   1238 			return(ENOMEM);
   1239 		bcopy(rr, rrcopy, sizeof(*rr));
   1240 		rrcopy->raidPtr = (void *) raidPtr;
   1241 
   1242 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1243 					   rf_ReconThread,
   1244 					   rrcopy,"raid_recon");
   1245 		return (0);
   1246 
   1247 		/* invoke a copyback operation after recon on whatever disk
   1248 		 * needs it, if any */
   1249 	case RAIDFRAME_COPYBACK:
   1250 
   1251 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1252 			/* This makes no sense on a RAID 0!! */
   1253 			return(EINVAL);
   1254 		}
   1255 
   1256 		if (raidPtr->copyback_in_progress == 1) {
   1257 			/* Copyback is already in progress! */
   1258 			return(EINVAL);
   1259 		}
   1260 
   1261 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1262 					   rf_CopybackThread,
   1263 					   raidPtr,"raid_copyback");
   1264 		return (retcode);
   1265 
   1266 		/* return the percentage completion of reconstruction */
   1267 	case RAIDFRAME_CHECK_RECON_STATUS:
   1268 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1269 			/* This makes no sense on a RAID 0 */
   1270 			return(EINVAL);
   1271 		}
   1272 		row = 0; /* XXX we only consider a single row... */
   1273 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1274 			*(int *) data = 100;
   1275 		else
   1276 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1277 		return (0);
   1278 
   1279 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1280 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1281 			/* This makes no sense on a RAID 0 */
   1282 			return(EINVAL);
   1283 		}
   1284 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1285 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1286 		} else {
   1287 			*(int *) data = 100;
   1288 		}
   1289 		return (0);
   1290 
   1291 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1292 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1293 			/* This makes no sense on a RAID 0 */
   1294 			return(EINVAL);
   1295 		}
   1296 		if (raidPtr->copyback_in_progress == 1) {
   1297 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1298 				raidPtr->Layout.numStripe;
   1299 		} else {
   1300 			*(int *) data = 100;
   1301 		}
   1302 		return (0);
   1303 
   1304 
   1305 		/* the sparetable daemon calls this to wait for the kernel to
   1306 		 * need a spare table. this ioctl does not return until a
   1307 		 * spare table is needed. XXX -- calling mpsleep here in the
   1308 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1309 		 * -- I should either compute the spare table in the kernel,
   1310 		 * or have a different -- XXX XXX -- interface (a different
   1311 		 * character device) for delivering the table     -- XXX */
   1312 #if 0
   1313 	case RAIDFRAME_SPARET_WAIT:
   1314 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1315 		while (!rf_sparet_wait_queue)
   1316 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1317 		waitreq = rf_sparet_wait_queue;
   1318 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1319 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1320 
   1321 		/* structure assignment */
   1322 		*((RF_SparetWait_t *) data) = *waitreq;
   1323 
   1324 		RF_Free(waitreq, sizeof(*waitreq));
   1325 		return (0);
   1326 
   1327 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1328 		 * code in it that will cause the dameon to exit */
   1329 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1330 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1331 		waitreq->fcol = -1;
   1332 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1333 		waitreq->next = rf_sparet_wait_queue;
   1334 		rf_sparet_wait_queue = waitreq;
   1335 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1336 		wakeup(&rf_sparet_wait_queue);
   1337 		return (0);
   1338 
   1339 		/* used by the spare table daemon to deliver a spare table
   1340 		 * into the kernel */
   1341 	case RAIDFRAME_SEND_SPARET:
   1342 
   1343 		/* install the spare table */
   1344 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1345 
   1346 		/* respond to the requestor.  the return status of the spare
   1347 		 * table installation is passed in the "fcol" field */
   1348 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1349 		waitreq->fcol = retcode;
   1350 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1351 		waitreq->next = rf_sparet_resp_queue;
   1352 		rf_sparet_resp_queue = waitreq;
   1353 		wakeup(&rf_sparet_resp_queue);
   1354 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1355 
   1356 		return (retcode);
   1357 #endif
   1358 
   1359 	default:
   1360 		break; /* fall through to the os-specific code below */
   1361 
   1362 	}
   1363 
   1364 	if (!raidPtr->valid)
   1365 		return (EINVAL);
   1366 
   1367 	/*
   1368 	 * Add support for "regular" device ioctls here.
   1369 	 */
   1370 
   1371 	switch (cmd) {
   1372 	case DIOCGDINFO:
   1373 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1374 		break;
   1375 
   1376 	case DIOCGPART:
   1377 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1378 		((struct partinfo *) data)->part =
   1379 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1380 		break;
   1381 
   1382 	case DIOCWDINFO:
   1383 	case DIOCSDINFO:
   1384 		if ((error = raidlock(rs)) != 0)
   1385 			return (error);
   1386 
   1387 		rs->sc_flags |= RAIDF_LABELLING;
   1388 
   1389 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1390 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1391 		if (error == 0) {
   1392 			if (cmd == DIOCWDINFO)
   1393 				error = writedisklabel(RAIDLABELDEV(dev),
   1394 				    raidstrategy, rs->sc_dkdev.dk_label,
   1395 				    rs->sc_dkdev.dk_cpulabel);
   1396 		}
   1397 		rs->sc_flags &= ~RAIDF_LABELLING;
   1398 
   1399 		raidunlock(rs);
   1400 
   1401 		if (error)
   1402 			return (error);
   1403 		break;
   1404 
   1405 	case DIOCWLABEL:
   1406 		if (*(int *) data != 0)
   1407 			rs->sc_flags |= RAIDF_WLABEL;
   1408 		else
   1409 			rs->sc_flags &= ~RAIDF_WLABEL;
   1410 		break;
   1411 
   1412 	case DIOCGDEFLABEL:
   1413 		raidgetdefaultlabel(raidPtr, rs,
   1414 		    (struct disklabel *) data);
   1415 		break;
   1416 
   1417 	default:
   1418 		retcode = ENOTTY;
   1419 	}
   1420 	return (retcode);
   1421 
   1422 }
   1423 
   1424 
   1425 /* raidinit -- complete the rest of the initialization for the
   1426    RAIDframe device.  */
   1427 
   1428 
   1429 static void
   1430 raidinit(raidPtr)
   1431 	RF_Raid_t *raidPtr;
   1432 {
   1433 	struct raid_softc *rs;
   1434 	int     unit;
   1435 
   1436 	unit = raidPtr->raidid;
   1437 
   1438 	rs = &raid_softc[unit];
   1439 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1440 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1441 
   1442 
   1443 	/* XXX should check return code first... */
   1444 	rs->sc_flags |= RAIDF_INITED;
   1445 
   1446 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1447 
   1448 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1449 
   1450 	/* disk_attach actually creates space for the CPU disklabel, among
   1451 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1452 	 * with disklabels. */
   1453 
   1454 	disk_attach(&rs->sc_dkdev);
   1455 
   1456 	/* XXX There may be a weird interaction here between this, and
   1457 	 * protectedSectors, as used in RAIDframe.  */
   1458 
   1459 	rs->sc_size = raidPtr->totalSectors;
   1460 
   1461 }
   1462 
   1463 /* wake up the daemon & tell it to get us a spare table
   1464  * XXX
   1465  * the entries in the queues should be tagged with the raidPtr
   1466  * so that in the extremely rare case that two recons happen at once,
   1467  * we know for which device were requesting a spare table
   1468  * XXX
   1469  *
   1470  * XXX This code is not currently used. GO
   1471  */
   1472 int
   1473 rf_GetSpareTableFromDaemon(req)
   1474 	RF_SparetWait_t *req;
   1475 {
   1476 	int     retcode;
   1477 
   1478 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1479 	req->next = rf_sparet_wait_queue;
   1480 	rf_sparet_wait_queue = req;
   1481 	wakeup(&rf_sparet_wait_queue);
   1482 
   1483 	/* mpsleep unlocks the mutex */
   1484 	while (!rf_sparet_resp_queue) {
   1485 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1486 		    "raidframe getsparetable", 0);
   1487 	}
   1488 	req = rf_sparet_resp_queue;
   1489 	rf_sparet_resp_queue = req->next;
   1490 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1491 
   1492 	retcode = req->fcol;
   1493 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1494 					 * alloc'd */
   1495 	return (retcode);
   1496 }
   1497 
   1498 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1499  * bp & passes it down.
   1500  * any calls originating in the kernel must use non-blocking I/O
   1501  * do some extra sanity checking to return "appropriate" error values for
   1502  * certain conditions (to make some standard utilities work)
   1503  *
   1504  * Formerly known as: rf_DoAccessKernel
   1505  */
   1506 void
   1507 raidstart(raidPtr)
   1508 	RF_Raid_t *raidPtr;
   1509 {
   1510 	RF_SectorCount_t num_blocks, pb, sum;
   1511 	RF_RaidAddr_t raid_addr;
   1512 	int     retcode;
   1513 	struct partition *pp;
   1514 	daddr_t blocknum;
   1515 	int     unit;
   1516 	struct raid_softc *rs;
   1517 	int     do_async;
   1518 	struct buf *bp;
   1519 
   1520 	unit = raidPtr->raidid;
   1521 	rs = &raid_softc[unit];
   1522 
   1523 	/* quick check to see if anything has died recently */
   1524 	RF_LOCK_MUTEX(raidPtr->mutex);
   1525 	if (raidPtr->numNewFailures > 0) {
   1526 		rf_update_component_labels(raidPtr);
   1527 		raidPtr->numNewFailures--;
   1528 	}
   1529 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1530 
   1531 	/* Check to see if we're at the limit... */
   1532 	RF_LOCK_MUTEX(raidPtr->mutex);
   1533 	while (raidPtr->openings > 0) {
   1534 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1535 
   1536 		/* get the next item, if any, from the queue */
   1537 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1538 			/* nothing more to do */
   1539 			return;
   1540 		}
   1541 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1542 
   1543 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1544 		 * partition.. Need to make it absolute to the underlying
   1545 		 * device.. */
   1546 
   1547 		blocknum = bp->b_blkno;
   1548 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1549 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1550 			blocknum += pp->p_offset;
   1551 		}
   1552 
   1553 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1554 			    (int) blocknum));
   1555 
   1556 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1557 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1558 
   1559 		/* *THIS* is where we adjust what block we're going to...
   1560 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1561 		raid_addr = blocknum;
   1562 
   1563 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1564 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1565 		sum = raid_addr + num_blocks + pb;
   1566 		if (1 || rf_debugKernelAccess) {
   1567 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1568 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1569 				    (int) pb, (int) bp->b_resid));
   1570 		}
   1571 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1572 		    || (sum < num_blocks) || (sum < pb)) {
   1573 			bp->b_error = ENOSPC;
   1574 			bp->b_flags |= B_ERROR;
   1575 			bp->b_resid = bp->b_bcount;
   1576 			biodone(bp);
   1577 			RF_LOCK_MUTEX(raidPtr->mutex);
   1578 			continue;
   1579 		}
   1580 		/*
   1581 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1582 		 */
   1583 
   1584 		if (bp->b_bcount & raidPtr->sectorMask) {
   1585 			bp->b_error = EINVAL;
   1586 			bp->b_flags |= B_ERROR;
   1587 			bp->b_resid = bp->b_bcount;
   1588 			biodone(bp);
   1589 			RF_LOCK_MUTEX(raidPtr->mutex);
   1590 			continue;
   1591 
   1592 		}
   1593 		db1_printf(("Calling DoAccess..\n"));
   1594 
   1595 
   1596 		RF_LOCK_MUTEX(raidPtr->mutex);
   1597 		raidPtr->openings--;
   1598 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1599 
   1600 		/*
   1601 		 * Everything is async.
   1602 		 */
   1603 		do_async = 1;
   1604 
   1605 		/* don't ever condition on bp->b_flags & B_WRITE.
   1606 		 * always condition on B_READ instead */
   1607 
   1608 		/* XXX we're still at splbio() here... do we *really*
   1609 		   need to be? */
   1610 
   1611 
   1612 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1613 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1614 				      do_async, raid_addr, num_blocks,
   1615 				      bp->b_un.b_addr, bp, NULL, NULL,
   1616 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1617 
   1618 
   1619 		RF_LOCK_MUTEX(raidPtr->mutex);
   1620 	}
   1621 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1622 }
   1623 
   1624 
   1625 
   1626 
   1627 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1628 
   1629 int
   1630 rf_DispatchKernelIO(queue, req)
   1631 	RF_DiskQueue_t *queue;
   1632 	RF_DiskQueueData_t *req;
   1633 {
   1634 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1635 	struct buf *bp;
   1636 	struct raidbuf *raidbp = NULL;
   1637 	struct raid_softc *rs;
   1638 	int     unit;
   1639 	int s;
   1640 
   1641 	s=0;
   1642 	/* s = splbio();*/ /* want to test this */
   1643 	/* XXX along with the vnode, we also need the softc associated with
   1644 	 * this device.. */
   1645 
   1646 	req->queue = queue;
   1647 
   1648 	unit = queue->raidPtr->raidid;
   1649 
   1650 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1651 
   1652 	if (unit >= numraid) {
   1653 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1654 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1655 	}
   1656 	rs = &raid_softc[unit];
   1657 
   1658 	/* XXX is this the right place? */
   1659 	disk_busy(&rs->sc_dkdev);
   1660 
   1661 	bp = req->bp;
   1662 #if 1
   1663 	/* XXX when there is a physical disk failure, someone is passing us a
   1664 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1665 	 * without taking a performance hit... (not sure where the real bug
   1666 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1667 
   1668 	if (bp->b_flags & B_ERROR) {
   1669 		bp->b_flags &= ~B_ERROR;
   1670 	}
   1671 	if (bp->b_error != 0) {
   1672 		bp->b_error = 0;
   1673 	}
   1674 #endif
   1675 	raidbp = RAIDGETBUF(rs);
   1676 
   1677 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1678 
   1679 	/*
   1680 	 * context for raidiodone
   1681 	 */
   1682 	raidbp->rf_obp = bp;
   1683 	raidbp->req = req;
   1684 
   1685 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1686 
   1687 	switch (req->type) {
   1688 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1689 		/* XXX need to do something extra here.. */
   1690 		/* I'm leaving this in, as I've never actually seen it used,
   1691 		 * and I'd like folks to report it... GO */
   1692 		printf(("WAKEUP CALLED\n"));
   1693 		queue->numOutstanding++;
   1694 
   1695 		/* XXX need to glue the original buffer into this??  */
   1696 
   1697 		KernelWakeupFunc(&raidbp->rf_buf);
   1698 		break;
   1699 
   1700 	case RF_IO_TYPE_READ:
   1701 	case RF_IO_TYPE_WRITE:
   1702 
   1703 		if (req->tracerec) {
   1704 			RF_ETIMER_START(req->tracerec->timer);
   1705 		}
   1706 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1707 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1708 		    req->sectorOffset, req->numSector,
   1709 		    req->buf, KernelWakeupFunc, (void *) req,
   1710 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1711 
   1712 		if (rf_debugKernelAccess) {
   1713 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1714 				(long) bp->b_blkno));
   1715 		}
   1716 		queue->numOutstanding++;
   1717 		queue->last_deq_sector = req->sectorOffset;
   1718 		/* acc wouldn't have been let in if there were any pending
   1719 		 * reqs at any other priority */
   1720 		queue->curPriority = req->priority;
   1721 
   1722 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1723 			req->type, unit, queue->row, queue->col));
   1724 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1725 			(int) req->sectorOffset, (int) req->numSector,
   1726 			(int) (req->numSector <<
   1727 			    queue->raidPtr->logBytesPerSector),
   1728 			(int) queue->raidPtr->logBytesPerSector));
   1729 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1730 			raidbp->rf_buf.b_vp->v_numoutput++;
   1731 		}
   1732 		VOP_STRATEGY(&raidbp->rf_buf);
   1733 
   1734 		break;
   1735 
   1736 	default:
   1737 		panic("bad req->type in rf_DispatchKernelIO");
   1738 	}
   1739 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1740 	/* splx(s); */ /* want to test this */
   1741 	return (0);
   1742 }
   1743 /* this is the callback function associated with a I/O invoked from
   1744    kernel code.
   1745  */
   1746 static void
   1747 KernelWakeupFunc(vbp)
   1748 	struct buf *vbp;
   1749 {
   1750 	RF_DiskQueueData_t *req = NULL;
   1751 	RF_DiskQueue_t *queue;
   1752 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1753 	struct buf *bp;
   1754 	struct raid_softc *rs;
   1755 	int     unit;
   1756 	register int s;
   1757 
   1758 	s = splbio();
   1759 	db1_printf(("recovering the request queue:\n"));
   1760 	req = raidbp->req;
   1761 
   1762 	bp = raidbp->rf_obp;
   1763 
   1764 	queue = (RF_DiskQueue_t *) req->queue;
   1765 
   1766 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1767 		bp->b_flags |= B_ERROR;
   1768 		bp->b_error = raidbp->rf_buf.b_error ?
   1769 		    raidbp->rf_buf.b_error : EIO;
   1770 	}
   1771 
   1772 	/* XXX methinks this could be wrong... */
   1773 #if 1
   1774 	bp->b_resid = raidbp->rf_buf.b_resid;
   1775 #endif
   1776 
   1777 	if (req->tracerec) {
   1778 		RF_ETIMER_STOP(req->tracerec->timer);
   1779 		RF_ETIMER_EVAL(req->tracerec->timer);
   1780 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1781 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1782 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1783 		req->tracerec->num_phys_ios++;
   1784 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1785 	}
   1786 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1787 
   1788 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1789 
   1790 
   1791 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1792 	 * ballistic, and mark the component as hosed... */
   1793 
   1794 	if (bp->b_flags & B_ERROR) {
   1795 		/* Mark the disk as dead */
   1796 		/* but only mark it once... */
   1797 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1798 		    rf_ds_optimal) {
   1799 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1800 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1801 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1802 			    rf_ds_failed;
   1803 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1804 			queue->raidPtr->numFailures++;
   1805 			queue->raidPtr->numNewFailures++;
   1806 			/* XXX here we should bump the version number for each component, and write that data out */
   1807 		} else {	/* Disk is already dead... */
   1808 			/* printf("Disk already marked as dead!\n"); */
   1809 		}
   1810 
   1811 	}
   1812 
   1813 	rs = &raid_softc[unit];
   1814 	RAIDPUTBUF(rs, raidbp);
   1815 
   1816 
   1817 	if (bp->b_resid == 0) {
   1818 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1819 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1820 	}
   1821 
   1822 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1823 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1824 
   1825 	splx(s);
   1826 }
   1827 
   1828 
   1829 
   1830 /*
   1831  * initialize a buf structure for doing an I/O in the kernel.
   1832  */
   1833 static void
   1834 InitBP(
   1835     struct buf * bp,
   1836     struct vnode * b_vp,
   1837     unsigned rw_flag,
   1838     dev_t dev,
   1839     RF_SectorNum_t startSect,
   1840     RF_SectorCount_t numSect,
   1841     caddr_t buf,
   1842     void (*cbFunc) (struct buf *),
   1843     void *cbArg,
   1844     int logBytesPerSector,
   1845     struct proc * b_proc)
   1846 {
   1847 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1848 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1849 	bp->b_bcount = numSect << logBytesPerSector;
   1850 	bp->b_bufsize = bp->b_bcount;
   1851 	bp->b_error = 0;
   1852 	bp->b_dev = dev;
   1853 	bp->b_un.b_addr = buf;
   1854 	bp->b_blkno = startSect;
   1855 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1856 	if (bp->b_bcount == 0) {
   1857 		panic("bp->b_bcount is zero in InitBP!!\n");
   1858 	}
   1859 	bp->b_proc = b_proc;
   1860 	bp->b_iodone = cbFunc;
   1861 	bp->b_vp = b_vp;
   1862 
   1863 }
   1864 
   1865 static void
   1866 raidgetdefaultlabel(raidPtr, rs, lp)
   1867 	RF_Raid_t *raidPtr;
   1868 	struct raid_softc *rs;
   1869 	struct disklabel *lp;
   1870 {
   1871 	db1_printf(("Building a default label...\n"));
   1872 	bzero(lp, sizeof(*lp));
   1873 
   1874 	/* fabricate a label... */
   1875 	lp->d_secperunit = raidPtr->totalSectors;
   1876 	lp->d_secsize = raidPtr->bytesPerSector;
   1877 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1878 	lp->d_ntracks = 1;
   1879 	lp->d_ncylinders = raidPtr->totalSectors /
   1880 		(lp->d_nsectors * lp->d_ntracks);
   1881 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1882 
   1883 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1884 	lp->d_type = DTYPE_RAID;
   1885 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1886 	lp->d_rpm = 3600;
   1887 	lp->d_interleave = 1;
   1888 	lp->d_flags = 0;
   1889 
   1890 	lp->d_partitions[RAW_PART].p_offset = 0;
   1891 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1892 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1893 	lp->d_npartitions = RAW_PART + 1;
   1894 
   1895 	lp->d_magic = DISKMAGIC;
   1896 	lp->d_magic2 = DISKMAGIC;
   1897 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1898 
   1899 }
   1900 /*
   1901  * Read the disklabel from the raid device.  If one is not present, fake one
   1902  * up.
   1903  */
   1904 static void
   1905 raidgetdisklabel(dev)
   1906 	dev_t   dev;
   1907 {
   1908 	int     unit = raidunit(dev);
   1909 	struct raid_softc *rs = &raid_softc[unit];
   1910 	char   *errstring;
   1911 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1912 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1913 	RF_Raid_t *raidPtr;
   1914 
   1915 	db1_printf(("Getting the disklabel...\n"));
   1916 
   1917 	bzero(clp, sizeof(*clp));
   1918 
   1919 	raidPtr = raidPtrs[unit];
   1920 
   1921 	raidgetdefaultlabel(raidPtr, rs, lp);
   1922 
   1923 	/*
   1924 	 * Call the generic disklabel extraction routine.
   1925 	 */
   1926 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1927 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1928 	if (errstring)
   1929 		raidmakedisklabel(rs);
   1930 	else {
   1931 		int     i;
   1932 		struct partition *pp;
   1933 
   1934 		/*
   1935 		 * Sanity check whether the found disklabel is valid.
   1936 		 *
   1937 		 * This is necessary since total size of the raid device
   1938 		 * may vary when an interleave is changed even though exactly
   1939 		 * same componets are used, and old disklabel may used
   1940 		 * if that is found.
   1941 		 */
   1942 		if (lp->d_secperunit != rs->sc_size)
   1943 			printf("WARNING: %s: "
   1944 			    "total sector size in disklabel (%d) != "
   1945 			    "the size of raid (%ld)\n", rs->sc_xname,
   1946 			    lp->d_secperunit, (long) rs->sc_size);
   1947 		for (i = 0; i < lp->d_npartitions; i++) {
   1948 			pp = &lp->d_partitions[i];
   1949 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1950 				printf("WARNING: %s: end of partition `%c' "
   1951 				    "exceeds the size of raid (%ld)\n",
   1952 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1953 		}
   1954 	}
   1955 
   1956 }
   1957 /*
   1958  * Take care of things one might want to take care of in the event
   1959  * that a disklabel isn't present.
   1960  */
   1961 static void
   1962 raidmakedisklabel(rs)
   1963 	struct raid_softc *rs;
   1964 {
   1965 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1966 	db1_printf(("Making a label..\n"));
   1967 
   1968 	/*
   1969 	 * For historical reasons, if there's no disklabel present
   1970 	 * the raw partition must be marked FS_BSDFFS.
   1971 	 */
   1972 
   1973 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1974 
   1975 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1976 
   1977 	lp->d_checksum = dkcksum(lp);
   1978 }
   1979 /*
   1980  * Lookup the provided name in the filesystem.  If the file exists,
   1981  * is a valid block device, and isn't being used by anyone else,
   1982  * set *vpp to the file's vnode.
   1983  * You'll find the original of this in ccd.c
   1984  */
   1985 int
   1986 raidlookup(path, p, vpp)
   1987 	char   *path;
   1988 	struct proc *p;
   1989 	struct vnode **vpp;	/* result */
   1990 {
   1991 	struct nameidata nd;
   1992 	struct vnode *vp;
   1993 	struct vattr va;
   1994 	int     error;
   1995 
   1996 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1997 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1998 #ifdef DEBUG
   1999 		printf("RAIDframe: vn_open returned %d\n", error);
   2000 #endif
   2001 		return (error);
   2002 	}
   2003 	vp = nd.ni_vp;
   2004 	if (vp->v_usecount > 1) {
   2005 		VOP_UNLOCK(vp, 0);
   2006 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2007 		return (EBUSY);
   2008 	}
   2009 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2010 		VOP_UNLOCK(vp, 0);
   2011 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2012 		return (error);
   2013 	}
   2014 	/* XXX: eventually we should handle VREG, too. */
   2015 	if (va.va_type != VBLK) {
   2016 		VOP_UNLOCK(vp, 0);
   2017 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2018 		return (ENOTBLK);
   2019 	}
   2020 	VOP_UNLOCK(vp, 0);
   2021 	*vpp = vp;
   2022 	return (0);
   2023 }
   2024 /*
   2025  * Wait interruptibly for an exclusive lock.
   2026  *
   2027  * XXX
   2028  * Several drivers do this; it should be abstracted and made MP-safe.
   2029  * (Hmm... where have we seen this warning before :->  GO )
   2030  */
   2031 static int
   2032 raidlock(rs)
   2033 	struct raid_softc *rs;
   2034 {
   2035 	int     error;
   2036 
   2037 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2038 		rs->sc_flags |= RAIDF_WANTED;
   2039 		if ((error =
   2040 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2041 			return (error);
   2042 	}
   2043 	rs->sc_flags |= RAIDF_LOCKED;
   2044 	return (0);
   2045 }
   2046 /*
   2047  * Unlock and wake up any waiters.
   2048  */
   2049 static void
   2050 raidunlock(rs)
   2051 	struct raid_softc *rs;
   2052 {
   2053 
   2054 	rs->sc_flags &= ~RAIDF_LOCKED;
   2055 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2056 		rs->sc_flags &= ~RAIDF_WANTED;
   2057 		wakeup(rs);
   2058 	}
   2059 }
   2060 
   2061 
   2062 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2063 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2064 
   2065 int
   2066 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2067 {
   2068 	RF_ComponentLabel_t clabel;
   2069 	raidread_component_label(dev, b_vp, &clabel);
   2070 	clabel.mod_counter = mod_counter;
   2071 	clabel.clean = RF_RAID_CLEAN;
   2072 	raidwrite_component_label(dev, b_vp, &clabel);
   2073 	return(0);
   2074 }
   2075 
   2076 
   2077 int
   2078 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2079 {
   2080 	RF_ComponentLabel_t clabel;
   2081 	raidread_component_label(dev, b_vp, &clabel);
   2082 	clabel.mod_counter = mod_counter;
   2083 	clabel.clean = RF_RAID_DIRTY;
   2084 	raidwrite_component_label(dev, b_vp, &clabel);
   2085 	return(0);
   2086 }
   2087 
   2088 /* ARGSUSED */
   2089 int
   2090 raidread_component_label(dev, b_vp, clabel)
   2091 	dev_t dev;
   2092 	struct vnode *b_vp;
   2093 	RF_ComponentLabel_t *clabel;
   2094 {
   2095 	struct buf *bp;
   2096 	int error;
   2097 
   2098 	/* XXX should probably ensure that we don't try to do this if
   2099 	   someone has changed rf_protected_sectors. */
   2100 
   2101 	/* get a block of the appropriate size... */
   2102 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2103 	bp->b_dev = dev;
   2104 
   2105 	/* get our ducks in a row for the read */
   2106 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2107 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2108 	bp->b_flags = B_BUSY | B_READ;
   2109  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2110 
   2111 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2112 
   2113 	error = biowait(bp);
   2114 
   2115 	if (!error) {
   2116 		memcpy(clabel, bp->b_un.b_addr,
   2117 		       sizeof(RF_ComponentLabel_t));
   2118 #if 0
   2119 		print_component_label( clabel );
   2120 #endif
   2121         } else {
   2122 #if 0
   2123 		printf("Failed to read RAID component label!\n");
   2124 #endif
   2125 	}
   2126 
   2127         bp->b_flags = B_INVAL | B_AGE;
   2128 	brelse(bp);
   2129 	return(error);
   2130 }
   2131 /* ARGSUSED */
   2132 int
   2133 raidwrite_component_label(dev, b_vp, clabel)
   2134 	dev_t dev;
   2135 	struct vnode *b_vp;
   2136 	RF_ComponentLabel_t *clabel;
   2137 {
   2138 	struct buf *bp;
   2139 	int error;
   2140 
   2141 	/* get a block of the appropriate size... */
   2142 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2143 	bp->b_dev = dev;
   2144 
   2145 	/* get our ducks in a row for the write */
   2146 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2147 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2148 	bp->b_flags = B_BUSY | B_WRITE;
   2149  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2150 
   2151 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2152 
   2153 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2154 
   2155 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2156 	error = biowait(bp);
   2157         bp->b_flags = B_INVAL | B_AGE;
   2158 	brelse(bp);
   2159 	if (error) {
   2160 #if 1
   2161 		printf("Failed to write RAID component info!\n");
   2162 #endif
   2163 	}
   2164 
   2165 	return(error);
   2166 }
   2167 
   2168 void
   2169 rf_markalldirty( raidPtr )
   2170 	RF_Raid_t *raidPtr;
   2171 {
   2172 	RF_ComponentLabel_t clabel;
   2173 	int r,c;
   2174 
   2175 	raidPtr->mod_counter++;
   2176 	for (r = 0; r < raidPtr->numRow; r++) {
   2177 		for (c = 0; c < raidPtr->numCol; c++) {
   2178 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2179 				raidread_component_label(
   2180 					raidPtr->Disks[r][c].dev,
   2181 					raidPtr->raid_cinfo[r][c].ci_vp,
   2182 					&clabel);
   2183 				if (clabel.status == rf_ds_spared) {
   2184 					/* XXX do something special...
   2185 					 but whatever you do, don't
   2186 					 try to access it!! */
   2187 				} else {
   2188 #if 0
   2189 				clabel.status =
   2190 					raidPtr->Disks[r][c].status;
   2191 				raidwrite_component_label(
   2192 					raidPtr->Disks[r][c].dev,
   2193 					raidPtr->raid_cinfo[r][c].ci_vp,
   2194 					&clabel);
   2195 #endif
   2196 				raidmarkdirty(
   2197 				       raidPtr->Disks[r][c].dev,
   2198 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2199 				       raidPtr->mod_counter);
   2200 				}
   2201 			}
   2202 		}
   2203 	}
   2204 	/* printf("Component labels marked dirty.\n"); */
   2205 #if 0
   2206 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2207 		sparecol = raidPtr->numCol + c;
   2208 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2209 			/*
   2210 
   2211 			   XXX this is where we get fancy and map this spare
   2212 			   into it's correct spot in the array.
   2213 
   2214 			 */
   2215 			/*
   2216 
   2217 			   we claim this disk is "optimal" if it's
   2218 			   rf_ds_used_spare, as that means it should be
   2219 			   directly substitutable for the disk it replaced.
   2220 			   We note that too...
   2221 
   2222 			 */
   2223 
   2224 			for(i=0;i<raidPtr->numRow;i++) {
   2225 				for(j=0;j<raidPtr->numCol;j++) {
   2226 					if ((raidPtr->Disks[i][j].spareRow ==
   2227 					     r) &&
   2228 					    (raidPtr->Disks[i][j].spareCol ==
   2229 					     sparecol)) {
   2230 						srow = r;
   2231 						scol = sparecol;
   2232 						break;
   2233 					}
   2234 				}
   2235 			}
   2236 
   2237 			raidread_component_label(
   2238 				      raidPtr->Disks[r][sparecol].dev,
   2239 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2240 				      &clabel);
   2241 			/* make sure status is noted */
   2242 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2243 			clabel.mod_counter = raidPtr->mod_counter;
   2244 			clabel.serial_number = raidPtr->serial_number;
   2245 			clabel.row = srow;
   2246 			clabel.column = scol;
   2247 			clabel.num_rows = raidPtr->numRow;
   2248 			clabel.num_columns = raidPtr->numCol;
   2249 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2250 			clabel.status = rf_ds_optimal;
   2251 			raidwrite_component_label(
   2252 				      raidPtr->Disks[r][sparecol].dev,
   2253 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2254 				      &clabel);
   2255 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2256 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2257 		}
   2258 	}
   2259 
   2260 #endif
   2261 }
   2262 
   2263 
   2264 void
   2265 rf_update_component_labels( raidPtr )
   2266 	RF_Raid_t *raidPtr;
   2267 {
   2268 	RF_ComponentLabel_t clabel;
   2269 	int sparecol;
   2270 	int r,c;
   2271 	int i,j;
   2272 	int srow, scol;
   2273 
   2274 	srow = -1;
   2275 	scol = -1;
   2276 
   2277 	/* XXX should do extra checks to make sure things really are clean,
   2278 	   rather than blindly setting the clean bit... */
   2279 
   2280 	raidPtr->mod_counter++;
   2281 
   2282 	for (r = 0; r < raidPtr->numRow; r++) {
   2283 		for (c = 0; c < raidPtr->numCol; c++) {
   2284 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2285 				raidread_component_label(
   2286 					raidPtr->Disks[r][c].dev,
   2287 					raidPtr->raid_cinfo[r][c].ci_vp,
   2288 					&clabel);
   2289 				/* make sure status is noted */
   2290 				clabel.status = rf_ds_optimal;
   2291 				/* bump the counter */
   2292 #if 0
   2293 				clabel.mod_counter++;
   2294 #endif
   2295 				clabel.mod_counter = raidPtr->mod_counter;
   2296 #if 0
   2297 				/* note where this set was configured last */
   2298 				clabel.last_unit = raidPtr->raidid;
   2299 #endif
   2300 #if DEBUG
   2301 				if (raidPtr->mod_counter !=
   2302 				    clabel.mod_counter) {
   2303 					printf("raid%d: mod_counter for row: %d col: %d not in sync\n", raidPtr->raidid, r, c);
   2304 				}
   2305 #endif
   2306 
   2307 				raidwrite_component_label(
   2308 					raidPtr->Disks[r][c].dev,
   2309 					raidPtr->raid_cinfo[r][c].ci_vp,
   2310 					&clabel);
   2311 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2312 					raidmarkclean(
   2313 					      raidPtr->Disks[r][c].dev,
   2314 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2315 					      raidPtr->mod_counter);
   2316 				}
   2317 			}
   2318 			/* else we don't touch it.. */
   2319 #if 0
   2320 			else if (raidPtr->Disks[r][c].status !=
   2321 				   rf_ds_failed) {
   2322 				raidread_component_label(
   2323 					raidPtr->Disks[r][c].dev,
   2324 					raidPtr->raid_cinfo[r][c].ci_vp,
   2325 					&clabel);
   2326 				/* make sure status is noted */
   2327 				clabel.status =
   2328 					raidPtr->Disks[r][c].status;
   2329 				raidwrite_component_label(
   2330 					raidPtr->Disks[r][c].dev,
   2331 					raidPtr->raid_cinfo[r][c].ci_vp,
   2332 					&clabel);
   2333 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2334 					raidmarkclean(
   2335 					      raidPtr->Disks[r][c].dev,
   2336 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2337 					      raidPtr->mod_counter);
   2338 				}
   2339 			}
   2340 #endif
   2341 		}
   2342 	}
   2343 
   2344 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2345 		sparecol = raidPtr->numCol + c;
   2346 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2347 			/*
   2348 
   2349 			   we claim this disk is "optimal" if it's
   2350 			   rf_ds_used_spare, as that means it should be
   2351 			   directly substitutable for the disk it replaced.
   2352 			   We note that too...
   2353 
   2354 			 */
   2355 
   2356 			for(i=0;i<raidPtr->numRow;i++) {
   2357 				for(j=0;j<raidPtr->numCol;j++) {
   2358 					if ((raidPtr->Disks[i][j].spareRow ==
   2359 					     0) &&
   2360 					    (raidPtr->Disks[i][j].spareCol ==
   2361 					     sparecol)) {
   2362 						srow = i;
   2363 						scol = j;
   2364 						break;
   2365 					}
   2366 				}
   2367 			}
   2368 
   2369 			/* XXX shouldn't *really* need this... */
   2370 			raidread_component_label(
   2371 				      raidPtr->Disks[0][sparecol].dev,
   2372 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2373 				      &clabel);
   2374 			/* make sure status is noted */
   2375 
   2376 			raid_init_component_label(raidPtr, &clabel);
   2377 
   2378 			clabel.mod_counter = raidPtr->mod_counter;
   2379 			clabel.row = srow;
   2380 			clabel.column = scol;
   2381 			clabel.status = rf_ds_optimal;
   2382 
   2383 			raidwrite_component_label(
   2384 				      raidPtr->Disks[0][sparecol].dev,
   2385 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2386 				      &clabel);
   2387 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2388 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2389 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2390 					       raidPtr->mod_counter);
   2391 			}
   2392 		}
   2393 	}
   2394 	/* 	printf("Component labels updated\n"); */
   2395 }
   2396 
   2397 void
   2398 rf_ReconThread(req)
   2399 	struct rf_recon_req *req;
   2400 {
   2401 	int     s;
   2402 	RF_Raid_t *raidPtr;
   2403 
   2404 	s = splbio();
   2405 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2406 	raidPtr->recon_in_progress = 1;
   2407 
   2408 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2409 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2410 
   2411 	/* XXX get rid of this! we don't need it at all.. */
   2412 	RF_Free(req, sizeof(*req));
   2413 
   2414 	raidPtr->recon_in_progress = 0;
   2415 	splx(s);
   2416 
   2417 	/* That's all... */
   2418 	kthread_exit(0);        /* does not return */
   2419 }
   2420 
   2421 void
   2422 rf_RewriteParityThread(raidPtr)
   2423 	RF_Raid_t *raidPtr;
   2424 {
   2425 	int retcode;
   2426 	int s;
   2427 
   2428 	raidPtr->parity_rewrite_in_progress = 1;
   2429 	s = splbio();
   2430 	retcode = rf_RewriteParity(raidPtr);
   2431 	splx(s);
   2432 	if (retcode) {
   2433 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2434 	} else {
   2435 		/* set the clean bit!  If we shutdown correctly,
   2436 		   the clean bit on each component label will get
   2437 		   set */
   2438 		raidPtr->parity_good = RF_RAID_CLEAN;
   2439 	}
   2440 	raidPtr->parity_rewrite_in_progress = 0;
   2441 
   2442 	/* That's all... */
   2443 	kthread_exit(0);        /* does not return */
   2444 }
   2445 
   2446 
   2447 void
   2448 rf_CopybackThread(raidPtr)
   2449 	RF_Raid_t *raidPtr;
   2450 {
   2451 	int s;
   2452 
   2453 	raidPtr->copyback_in_progress = 1;
   2454 	s = splbio();
   2455 	rf_CopybackReconstructedData(raidPtr);
   2456 	splx(s);
   2457 	raidPtr->copyback_in_progress = 0;
   2458 
   2459 	/* That's all... */
   2460 	kthread_exit(0);        /* does not return */
   2461 }
   2462 
   2463 
   2464 void
   2465 rf_ReconstructInPlaceThread(req)
   2466 	struct rf_recon_req *req;
   2467 {
   2468 	int retcode;
   2469 	int s;
   2470 	RF_Raid_t *raidPtr;
   2471 
   2472 	s = splbio();
   2473 	raidPtr = req->raidPtr;
   2474 	raidPtr->recon_in_progress = 1;
   2475 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2476 	RF_Free(req, sizeof(*req));
   2477 	raidPtr->recon_in_progress = 0;
   2478 	splx(s);
   2479 
   2480 	/* That's all... */
   2481 	kthread_exit(0);        /* does not return */
   2482 }
   2483 
   2484 void
   2485 rf_mountroot_hook(dev)
   2486 	struct device *dev;
   2487 {
   2488 
   2489 }
   2490 
   2491 
   2492 RF_AutoConfig_t *
   2493 rf_find_raid_components()
   2494 {
   2495 	struct devnametobdevmaj *dtobdm;
   2496 	struct vnode *vp;
   2497 	struct disklabel label;
   2498 	struct device *dv;
   2499 	char *cd_name;
   2500 	dev_t dev;
   2501 	int error;
   2502 	int i;
   2503 	int good_one;
   2504 	RF_ComponentLabel_t *clabel;
   2505 	RF_AutoConfig_t *ac_list;
   2506 	RF_AutoConfig_t *ac;
   2507 
   2508 
   2509 	/* initialize the AutoConfig list */
   2510 	ac_list = NULL;
   2511 
   2512 if (raidautoconfig) {
   2513 
   2514 	/* we begin by trolling through *all* the devices on the system */
   2515 
   2516 	for (dv = alldevs.tqh_first; dv != NULL;
   2517 	     dv = dv->dv_list.tqe_next) {
   2518 
   2519 		/* we are only interested in disks... */
   2520 		if (dv->dv_class != DV_DISK)
   2521 			continue;
   2522 
   2523 		/* we don't care about floppies... */
   2524 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2525 			continue;
   2526 		}
   2527 
   2528 		/* need to find the device_name_to_block_device_major stuff */
   2529 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2530 		dtobdm = dev_name2blk;
   2531 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2532 			dtobdm++;
   2533 		}
   2534 
   2535 		/* get a vnode for the raw partition of this disk */
   2536 
   2537 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2538 		if (bdevvp(dev, &vp))
   2539 			panic("RAID can't alloc vnode");
   2540 
   2541 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2542 
   2543 		if (error) {
   2544 			/* "Who cares."  Continue looking
   2545 			   for something that exists*/
   2546 			vput(vp);
   2547 			continue;
   2548 		}
   2549 
   2550 		/* Ok, the disk exists.  Go get the disklabel. */
   2551 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2552 				  FREAD, NOCRED, 0);
   2553 		if (error) {
   2554 			/*
   2555 			 * XXX can't happen - open() would
   2556 			 * have errored out (or faked up one)
   2557 			 */
   2558 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2559 			       dv->dv_xname, 'a' + RAW_PART, error);
   2560 		}
   2561 
   2562 		/* don't need this any more.  We'll allocate it again
   2563 		   a little later if we really do... */
   2564 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2565 		vput(vp);
   2566 
   2567 		for (i=0; i < label.d_npartitions; i++) {
   2568 			/* We only support partitions marked as RAID */
   2569 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2570 				continue;
   2571 
   2572 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2573 			if (bdevvp(dev, &vp))
   2574 				panic("RAID can't alloc vnode");
   2575 
   2576 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2577 			if (error) {
   2578 				/* Whatever... */
   2579 				vput(vp);
   2580 				continue;
   2581 			}
   2582 
   2583 			good_one = 0;
   2584 
   2585 			clabel = (RF_ComponentLabel_t *)
   2586 				malloc(sizeof(RF_ComponentLabel_t),
   2587 				       M_RAIDFRAME, M_NOWAIT);
   2588 			if (clabel == NULL) {
   2589 				/* XXX CLEANUP HERE */
   2590 				printf("RAID auto config: out of memory!\n");
   2591 				return(NULL); /* XXX probably should panic? */
   2592 			}
   2593 
   2594 			if (!raidread_component_label(dev, vp, clabel)) {
   2595 				/* Got the label.  Does it look reasonable? */
   2596 				if (rf_reasonable_label(clabel) &&
   2597 				    (clabel->partitionSize <=
   2598 				     label.d_partitions[i].p_size)) {
   2599 #if DEBUG
   2600 					printf("Component on: %s%c: %d\n",
   2601 					       dv->dv_xname, 'a'+i,
   2602 					       label.d_partitions[i].p_size);
   2603 					print_component_label(clabel);
   2604 #endif
   2605 					/* if it's reasonable, add it,
   2606 					   else ignore it. */
   2607 					ac = (RF_AutoConfig_t *)
   2608 						malloc(sizeof(RF_AutoConfig_t),
   2609 						       M_RAIDFRAME,
   2610 						       M_NOWAIT);
   2611 					if (ac == NULL) {
   2612 						/* XXX should panic?? */
   2613 						return(NULL);
   2614 					}
   2615 
   2616 					sprintf(ac->devname, "%s%c",
   2617 						dv->dv_xname, 'a'+i);
   2618 					ac->dev = dev;
   2619 					ac->vp = vp;
   2620 					ac->clabel = clabel;
   2621 					ac->next = ac_list;
   2622 					ac_list = ac;
   2623 					good_one = 1;
   2624 				}
   2625 			}
   2626 			if (!good_one) {
   2627 				/* cleanup */
   2628 				free(clabel, M_RAIDFRAME);
   2629 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2630 				vput(vp);
   2631 			}
   2632 		}
   2633 	}
   2634 }
   2635 return(ac_list);
   2636 }
   2637 
   2638 static int
   2639 rf_reasonable_label(clabel)
   2640 	RF_ComponentLabel_t *clabel;
   2641 {
   2642 
   2643 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2644 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2645 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2646 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2647 	    clabel->row >=0 &&
   2648 	    clabel->column >= 0 &&
   2649 	    clabel->num_rows > 0 &&
   2650 	    clabel->num_columns > 0 &&
   2651 	    clabel->row < clabel->num_rows &&
   2652 	    clabel->column < clabel->num_columns &&
   2653 	    clabel->blockSize > 0 &&
   2654 	    clabel->numBlocks > 0) {
   2655 		/* label looks reasonable enough... */
   2656 		return(1);
   2657 	}
   2658 	return(0);
   2659 }
   2660 
   2661 
   2662 void
   2663 print_component_label(clabel)
   2664 	RF_ComponentLabel_t *clabel;
   2665 {
   2666 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2667 	       clabel->row, clabel->column,
   2668 	       clabel->num_rows, clabel->num_columns);
   2669 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2670 	       clabel->version, clabel->serial_number,
   2671 	       clabel->mod_counter);
   2672 	printf("   Clean: %s Status: %d\n",
   2673 	       clabel->clean ? "Yes" : "No", clabel->status );
   2674 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2675 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2676 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2677 	       (char) clabel->parityConfig, clabel->blockSize,
   2678 	       clabel->numBlocks);
   2679 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2680 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2681 #if 0
   2682 	   printf("   Config order: %d\n", clabel->config_order);
   2683 #endif
   2684 
   2685 }
   2686 
   2687 RF_ConfigSet_t *
   2688 rf_create_auto_sets(ac_list)
   2689 	RF_AutoConfig_t *ac_list;
   2690 {
   2691 	RF_AutoConfig_t *ac;
   2692 	RF_ConfigSet_t *config_sets;
   2693 	RF_ConfigSet_t *cset;
   2694 	RF_AutoConfig_t *ac_next;
   2695 
   2696 
   2697 	config_sets = NULL;
   2698 
   2699 	/* Go through the AutoConfig list, and figure out which components
   2700 	   belong to what sets.  */
   2701 	ac = ac_list;
   2702 	while(ac!=NULL) {
   2703 		/* we're going to putz with ac->next, so save it here
   2704 		   for use at the end of the loop */
   2705 		ac_next = ac->next;
   2706 
   2707 		if (config_sets == NULL) {
   2708 			/* will need at least this one... */
   2709 			config_sets = (RF_ConfigSet_t *)
   2710 				malloc(sizeof(RF_ConfigSet_t),
   2711 				       M_RAIDFRAME, M_NOWAIT);
   2712 			if (config_sets == NULL) {
   2713 				panic("rf_create_auto_sets: No memory!\n");
   2714 			}
   2715 			/* this one is easy :) */
   2716 			config_sets->ac = ac;
   2717 			config_sets->next = NULL;
   2718 			config_sets->rootable = 0;
   2719 			ac->next = NULL;
   2720 		} else {
   2721 			/* which set does this component fit into? */
   2722 			cset = config_sets;
   2723 			while(cset!=NULL) {
   2724 				if (rf_does_it_fit(cset, ac)) {
   2725 					/* looks like it matches */
   2726 					ac->next = cset->ac;
   2727 					cset->ac = ac;
   2728 					break;
   2729 				}
   2730 				cset = cset->next;
   2731 			}
   2732 			if (cset==NULL) {
   2733 				/* didn't find a match above... new set..*/
   2734 				cset = (RF_ConfigSet_t *)
   2735 					malloc(sizeof(RF_ConfigSet_t),
   2736 					       M_RAIDFRAME, M_NOWAIT);
   2737 				if (cset == NULL) {
   2738 					panic("rf_create_auto_sets: No memory!\n");
   2739 				}
   2740 				cset->ac = ac;
   2741 				ac->next = NULL;
   2742 				cset->next = config_sets;
   2743 				cset->rootable = 0;
   2744 				config_sets = cset;
   2745 			}
   2746 		}
   2747 		ac = ac_next;
   2748 	}
   2749 
   2750 
   2751 	return(config_sets);
   2752 }
   2753 
   2754 static int
   2755 rf_does_it_fit(cset, ac)
   2756 	RF_ConfigSet_t *cset;
   2757 	RF_AutoConfig_t *ac;
   2758 {
   2759 	RF_ComponentLabel_t *clabel1, *clabel2;
   2760 
   2761 	/* If this one matches the *first* one in the set, that's good
   2762 	   enough, since the other members of the set would have been
   2763 	   through here too... */
   2764 	/* note that we are not checking partitionSize here..
   2765 
   2766 	   Note that we are also not checking the mod_counters here.
   2767 	   If everything else matches execpt the mod_counter, that's
   2768 	   good enough for this test.  We will deal with the mod_counters
   2769 	   a little later in the autoconfiguration process.
   2770 
   2771 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2772 
   2773 	*/
   2774 
   2775 	clabel1 = cset->ac->clabel;
   2776 	clabel2 = ac->clabel;
   2777 	if ((clabel1->version == clabel2->version) &&
   2778 	    (clabel1->serial_number == clabel2->serial_number) &&
   2779 	    (clabel1->num_rows == clabel2->num_rows) &&
   2780 	    (clabel1->num_columns == clabel2->num_columns) &&
   2781 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2782 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2783 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2784 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2785 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2786 	    (clabel1->blockSize == clabel2->blockSize) &&
   2787 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2788 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2789 	    (clabel1->root_partition == clabel2->root_partition) &&
   2790 	    (clabel1->last_unit == clabel2->last_unit) &&
   2791 	    (clabel1->config_order == clabel2->config_order)) {
   2792 		/* if it get's here, it almost *has* to be a match */
   2793 	} else {
   2794 		/* it's not consistent with somebody in the set..
   2795 		   punt */
   2796 		return(0);
   2797 	}
   2798 	/* all was fine.. it must fit... */
   2799 	return(1);
   2800 }
   2801 
   2802 int
   2803 rf_have_enough_components(cset)
   2804 	RF_ConfigSet_t *cset;
   2805 {
   2806 	RF_AutoConfig_t *ac;
   2807 	RF_AutoConfig_t *auto_config;
   2808 	RF_ComponentLabel_t *clabel;
   2809 	int r,c;
   2810 	int num_rows;
   2811 	int num_cols;
   2812 	int num_missing;
   2813 
   2814 	/* check to see that we have enough 'live' components
   2815 	   of this set.  If so, we can configure it if necessary */
   2816 
   2817 	num_rows = cset->ac->clabel->num_rows;
   2818 	num_cols = cset->ac->clabel->num_columns;
   2819 
   2820 	/* XXX Check for duplicate components!?!?!? */
   2821 
   2822 	num_missing = 0;
   2823 	auto_config = cset->ac;
   2824 
   2825 	for(r=0; r<num_rows; r++) {
   2826 		for(c=0; c<num_cols; c++) {
   2827 			ac = auto_config;
   2828 			while(ac!=NULL) {
   2829 				if (ac->clabel==NULL) {
   2830 					/* big-time bad news. */
   2831 					goto fail;
   2832 				}
   2833 				if ((ac->clabel->row == r) &&
   2834 				    (ac->clabel->column == c)) {
   2835 					/* it's this one... */
   2836 #if DEBUG
   2837 					printf("Found: %s at %d,%d\n",
   2838 					       ac->devname,r,c);
   2839 #endif
   2840 					break;
   2841 				}
   2842 				ac=ac->next;
   2843 			}
   2844 			if (ac==NULL) {
   2845 				/* Didn't find one here! */
   2846 				num_missing++;
   2847 			}
   2848 		}
   2849 	}
   2850 
   2851 	clabel = cset->ac->clabel;
   2852 
   2853 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2854 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2855 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2856 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2857 		/* XXX this needs to be made *much* more general */
   2858 		/* Too many failures */
   2859 		return(0);
   2860 	}
   2861 	/* otherwise, all is well, and we've got enough to take a kick
   2862 	   at autoconfiguring this set */
   2863 	return(1);
   2864 fail:
   2865 	return(0);
   2866 
   2867 }
   2868 
   2869 void
   2870 rf_create_configuration(ac,config,raidPtr)
   2871 	RF_AutoConfig_t *ac;
   2872 	RF_Config_t *config;
   2873 	RF_Raid_t *raidPtr;
   2874 {
   2875 	RF_ComponentLabel_t *clabel;
   2876 
   2877 	clabel = ac->clabel;
   2878 
   2879 	/* 1. Fill in the common stuff */
   2880 	config->numRow = clabel->num_rows;
   2881 	config->numCol = clabel->num_columns;
   2882 	config->numSpare = 0; /* XXX should this be set here? */
   2883 	config->sectPerSU = clabel->sectPerSU;
   2884 	config->SUsPerPU = clabel->SUsPerPU;
   2885 	config->SUsPerRU = clabel->SUsPerRU;
   2886 	config->parityConfig = clabel->parityConfig;
   2887 	/* XXX... */
   2888 	strcpy(config->diskQueueType,"fifo");
   2889 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2890 	config->layoutSpecificSize = 0; /* XXX ?? */
   2891 
   2892 	while(ac!=NULL) {
   2893 		/* row/col values will be in range due to the checks
   2894 		   in reasonable_label() */
   2895 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2896 		       ac->devname);
   2897 		ac = ac->next;
   2898 	}
   2899 
   2900 }
   2901 
   2902 int
   2903 rf_set_autoconfig(raidPtr, new_value)
   2904 	RF_Raid_t *raidPtr;
   2905 	int new_value;
   2906 {
   2907 	RF_ComponentLabel_t clabel;
   2908 	struct vnode *vp;
   2909 	dev_t dev;
   2910 	int row, column;
   2911 
   2912 	raidPtr->autoconfigure = new_value;
   2913 	for(row=0; row<raidPtr->numRow; row++) {
   2914 		for(column=0; column<raidPtr->numCol; column++) {
   2915 			dev = raidPtr->Disks[row][column].dev;
   2916 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2917 			raidread_component_label(dev, vp, &clabel);
   2918 			clabel.autoconfigure = new_value;
   2919 			raidwrite_component_label(dev, vp, &clabel);
   2920 		}
   2921 	}
   2922 	return(new_value);
   2923 }
   2924 
   2925 int
   2926 rf_set_rootpartition(raidPtr, new_value)
   2927 	RF_Raid_t *raidPtr;
   2928 	int new_value;
   2929 {
   2930 	RF_ComponentLabel_t clabel;
   2931 	struct vnode *vp;
   2932 	dev_t dev;
   2933 	int row, column;
   2934 
   2935 	raidPtr->root_partition = new_value;
   2936 	for(row=0; row<raidPtr->numRow; row++) {
   2937 		for(column=0; column<raidPtr->numCol; column++) {
   2938 			dev = raidPtr->Disks[row][column].dev;
   2939 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2940 			raidread_component_label(dev, vp, &clabel);
   2941 			clabel.root_partition = new_value;
   2942 			raidwrite_component_label(dev, vp, &clabel);
   2943 		}
   2944 	}
   2945 	return(new_value);
   2946 }
   2947 
   2948 void
   2949 rf_release_all_vps(cset)
   2950 	RF_ConfigSet_t *cset;
   2951 {
   2952 	RF_AutoConfig_t *ac;
   2953 
   2954 	ac = cset->ac;
   2955 	while(ac!=NULL) {
   2956 		/* Close the vp, and give it back */
   2957 		if (ac->vp) {
   2958 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2959 			vput(ac->vp);
   2960 		}
   2961 		ac = ac->next;
   2962 	}
   2963 }
   2964 
   2965 
   2966 void
   2967 rf_cleanup_config_set(cset)
   2968 	RF_ConfigSet_t *cset;
   2969 {
   2970 	RF_AutoConfig_t *ac;
   2971 	RF_AutoConfig_t *next_ac;
   2972 
   2973 	ac = cset->ac;
   2974 	while(ac!=NULL) {
   2975 		next_ac = ac->next;
   2976 		/* nuke the label */
   2977 		free(ac->clabel, M_RAIDFRAME);
   2978 		/* cleanup the config structure */
   2979 		free(ac, M_RAIDFRAME);
   2980 		/* "next.." */
   2981 		ac = next_ac;
   2982 	}
   2983 	/* and, finally, nuke the config set */
   2984 	free(cset, M_RAIDFRAME);
   2985 }
   2986 
   2987 
   2988 void
   2989 raid_init_component_label(raidPtr, clabel)
   2990 	RF_Raid_t *raidPtr;
   2991 	RF_ComponentLabel_t *clabel;
   2992 {
   2993 	/* current version number */
   2994 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   2995 	clabel->serial_number = raidPtr->serial_number;
   2996 	clabel->mod_counter = raidPtr->mod_counter;
   2997 	clabel->num_rows = raidPtr->numRow;
   2998 	clabel->num_columns = raidPtr->numCol;
   2999 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3000 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3001 
   3002 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3003 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3004 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3005 
   3006 	clabel->blockSize = raidPtr->bytesPerSector;
   3007 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3008 
   3009 	/* XXX not portable */
   3010 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3011 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3012 	clabel->autoconfigure = raidPtr->autoconfigure;
   3013 	clabel->root_partition = raidPtr->root_partition;
   3014 	clabel->last_unit = raidPtr->raidid;
   3015 	clabel->config_order = raidPtr->config_order;
   3016 }
   3017 
   3018 int
   3019 rf_auto_config_set(cset,unit)
   3020 	RF_ConfigSet_t *cset;
   3021 	int *unit;
   3022 {
   3023 	RF_Raid_t *raidPtr;
   3024 	RF_Config_t *config;
   3025 	int raidID;
   3026 	int retcode;
   3027 
   3028 	printf("Starting autoconfigure on raid%d\n",raidID);
   3029 
   3030 	retcode = 0;
   3031 	*unit = -1;
   3032 
   3033 	/* 1. Create a config structure */
   3034 
   3035 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3036 				       M_RAIDFRAME,
   3037 				       M_NOWAIT);
   3038 	if (config==NULL) {
   3039 		printf("Out of mem!?!?\n");
   3040 				/* XXX do something more intelligent here. */
   3041 		return(1);
   3042 	}
   3043 	/* XXX raidID needs to be set correctly.. */
   3044 
   3045 	/*
   3046 	   2. Figure out what RAID ID this one is supposed to live at
   3047 	   See if we can get the same RAID dev that it was configured
   3048 	   on last time..
   3049 	*/
   3050 
   3051 	raidID = cset->ac->clabel->last_unit;
   3052 	if ((raidID < 0) || (raidID >= numraid)) {
   3053 		/* let's not wander off into lala land. */
   3054 		raidID = numraid - 1;
   3055 	}
   3056 	if (raidPtrs[raidID]->valid != 0) {
   3057 
   3058 		/*
   3059 		   Nope... Go looking for an alternative...
   3060 		   Start high so we don't immediately use raid0 if that's
   3061 		   not taken.
   3062 		*/
   3063 
   3064 		for(raidID = numraid; raidID >= 0; raidID--) {
   3065 			if (raidPtrs[raidID]->valid == 0) {
   3066 				/* can use this one! */
   3067 				break;
   3068 			}
   3069 		}
   3070 	}
   3071 
   3072 	if (raidID < 0) {
   3073 		/* punt... */
   3074 		printf("Unable to auto configure this set!\n");
   3075 		printf("(Out of RAID devs!)\n");
   3076 		return(1);
   3077 	}
   3078 
   3079 	raidPtr = raidPtrs[raidID];
   3080 
   3081 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3082 	raidPtr->raidid = raidID;
   3083 	raidPtr->openings = RAIDOUTSTANDING;
   3084 
   3085 	/* 3. Build the configuration structure */
   3086 	rf_create_configuration(cset->ac, config, raidPtr);
   3087 
   3088 	/* 4. Do the configuration */
   3089 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3090 
   3091 	if (retcode == 0) {
   3092 #if DEBUG
   3093 		printf("Calling raidinit()\n");
   3094 #endif
   3095 				/* XXX the 0 below is bogus! */
   3096 		raidinit(raidPtrs[raidID]);
   3097 
   3098 		rf_markalldirty(raidPtrs[raidID]);
   3099 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3100 		if (cset->ac->clabel->root_partition==1) {
   3101 			/* everything configured just fine.  Make a note
   3102 			   that this set is eligible to be root. */
   3103 			cset->rootable = 1;
   3104 			/* XXX do this here? */
   3105 			raidPtrs[raidID]->root_partition = 1;
   3106 		}
   3107 	}
   3108 
   3109 	/* 5. Cleanup */
   3110 	free(config, M_RAIDFRAME);
   3111 
   3112 	*unit = raidID;
   3113 	return(retcode);
   3114 }
   3115