Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.59
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.59 2000/02/25 02:42:30 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit __P((RF_Raid_t *));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208 */
    209 
    210 struct raid_softc {
    211 	int     sc_flags;	/* flags */
    212 	int     sc_cflags;	/* configuration flags */
    213 	size_t  sc_size;        /* size of the raid device */
    214 	char    sc_xname[20];	/* XXX external name */
    215 	struct disk sc_dkdev;	/* generic disk device info */
    216 	struct pool sc_cbufpool;	/* component buffer pool */
    217 	struct buf_queue buf_queue;	/* used for the device queue */
    218 };
    219 /* sc_flags */
    220 #define RAIDF_INITED	0x01	/* unit has been initialized */
    221 #define RAIDF_WLABEL	0x02	/* label area is writable */
    222 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    223 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    224 #define RAIDF_LOCKED	0x80	/* unit is locked */
    225 
    226 #define	raidunit(x)	DISKUNIT(x)
    227 int numraid = 0;
    228 
    229 /*
    230  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    231  * Be aware that large numbers can allow the driver to consume a lot of
    232  * kernel memory, especially on writes, and in degraded mode reads.
    233  *
    234  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    235  * a single 64K write will typically require 64K for the old data,
    236  * 64K for the old parity, and 64K for the new parity, for a total
    237  * of 192K (if the parity buffer is not re-used immediately).
    238  * Even it if is used immedately, that's still 128K, which when multiplied
    239  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    240  *
    241  * Now in degraded mode, for example, a 64K read on the above setup may
    242  * require data reconstruction, which will require *all* of the 4 remaining
    243  * disks to participate -- 4 * 32K/disk == 128K again.
    244  */
    245 
    246 #ifndef RAIDOUTSTANDING
    247 #define RAIDOUTSTANDING   6
    248 #endif
    249 
    250 #define RAIDLABELDEV(dev)	\
    251 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    252 
    253 /* declared here, and made public, for the benefit of KVM stuff.. */
    254 struct raid_softc *raid_softc;
    255 
    256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    257 				     struct disklabel *));
    258 static void raidgetdisklabel __P((dev_t));
    259 static void raidmakedisklabel __P((struct raid_softc *));
    260 
    261 static int raidlock __P((struct raid_softc *));
    262 static void raidunlock __P((struct raid_softc *));
    263 
    264 static void rf_markalldirty __P((RF_Raid_t *));
    265 void rf_mountroot_hook __P((struct device *));
    266 
    267 struct device *raidrootdev;
    268 struct cfdata cf_raidrootdev;
    269 struct cfdriver cfdrv;
    270 /* XXX these should be moved up */
    271 #include "rf_configure.h"
    272 #include <sys/reboot.h>
    273 
    274 void rf_ReconThread __P((struct rf_recon_req *));
    275 /* XXX what I want is: */
    276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    280 void rf_buildroothack __P((void *));
    281 
    282 RF_AutoConfig_t *rf_find_raid_components __P((void));
    283 void print_component_label __P((RF_ComponentLabel_t *));
    284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    288 				  RF_Raid_t *));
    289 int rf_set_autoconfig __P((RF_Raid_t *, int));
    290 int rf_set_rootpartition __P((RF_Raid_t *, int));
    291 void rf_release_all_vps __P((RF_ConfigSet_t *));
    292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    293 int rf_have_enough_components __P((RF_ConfigSet_t *));
    294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    295 
    296 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    297 				  allow autoconfig to take place */
    298 /* XXX ugly hack. */
    299 const char *raid_rooty = "raid0";
    300 extern struct device *booted_device;
    301 
    302 void
    303 raidattach(num)
    304 	int     num;
    305 {
    306 	int raidID;
    307 	int i, rc;
    308 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    309 	RF_ConfigSet_t *config_sets;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	numraid = num;
    324 
    325 	/* Make some space for requested number of units... */
    326 
    327 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    328 	if (raidPtrs == NULL) {
    329 		panic("raidPtrs is NULL!!\n");
    330 	}
    331 
    332 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    333 	if (rc) {
    334 		RF_PANIC();
    335 	}
    336 
    337 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    338 
    339 	for (i = 0; i < num; i++)
    340 		raidPtrs[i] = NULL;
    341 	rc = rf_BootRaidframe();
    342 	if (rc == 0)
    343 		printf("Kernelized RAIDframe activated\n");
    344 	else
    345 		panic("Serious error booting RAID!!\n");
    346 
    347 	/* put together some datastructures like the CCD device does.. This
    348 	 * lets us lock the device and what-not when it gets opened. */
    349 
    350 	raid_softc = (struct raid_softc *)
    351 		malloc(num * sizeof(struct raid_softc),
    352 		       M_RAIDFRAME, M_NOWAIT);
    353 	if (raid_softc == NULL) {
    354 		printf("WARNING: no memory for RAIDframe driver\n");
    355 		return;
    356 	}
    357 
    358 	bzero(raid_softc, num * sizeof(struct raid_softc));
    359 
    360 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    361 					      M_RAIDFRAME, M_NOWAIT);
    362 	if (raidrootdev == NULL) {
    363 		panic("No memory for RAIDframe driver!!?!?!\n");
    364 	}
    365 
    366 	for (raidID = 0; raidID < num; raidID++) {
    367 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    368 
    369 		raidrootdev[raidID].dv_class  = DV_DISK;
    370 		raidrootdev[raidID].dv_cfdata = NULL;
    371 		raidrootdev[raidID].dv_unit   = raidID;
    372 		raidrootdev[raidID].dv_parent = NULL;
    373 		raidrootdev[raidID].dv_flags  = 0;
    374 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    375 
    376 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    377 			  (RF_Raid_t *));
    378 		if (raidPtrs[raidID] == NULL) {
    379 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    380 			numraid = raidID;
    381 			return;
    382 		}
    383 	}
    384 
    385 if (raidautoconfig) {
    386 	/* 1. locate all RAID components on the system */
    387 
    388 #if DEBUG
    389 	printf("Searching for raid components...\n");
    390 #endif
    391 	ac_list = rf_find_raid_components();
    392 
    393 	/* 2. sort them into their respective sets */
    394 
    395 	config_sets = rf_create_auto_sets(ac_list);
    396 
    397 	/* 3. evaluate each set and configure the valid ones
    398 	   This gets done in rf_buildroothack() */
    399 
    400 	/* schedule the creation of the thread to do the
    401 	   "/ on RAID" stuff */
    402 
    403 	kthread_create(rf_buildroothack,config_sets);
    404 
    405 	/* 4. make sure we get our mud.. I mean root.. hooks in.. */
    406 	/* XXXX pick raid0 for now... and this should be only done
    407 	   if we find something that's bootable!!! */
    408 #if 0
    409 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    410 #endif
    411 	if (boothowto & RB_ASKNAME) {
    412 		/* We don't auto-config... */
    413 	} else {
    414 		/* They didn't ask, and we found something bootable... */
    415 		/* XXX pretend for now.. */
    416 #if 0
    417  		booted_device = &raidrootdev[0];
    418 #endif
    419 	}
    420 }
    421 
    422 }
    423 
    424 void
    425 rf_buildroothack(arg)
    426 	void *arg;
    427 {
    428 	RF_ConfigSet_t *config_sets = arg;
    429 	RF_ConfigSet_t *cset;
    430 	RF_ConfigSet_t *next_cset;
    431 	int retcode;
    432 	int raidID;
    433 	int rootID;
    434 	int num_root;
    435 
    436 	num_root = 0;
    437 	cset = config_sets;
    438 	while(cset != NULL ) {
    439 		next_cset = cset->next;
    440 		if (rf_have_enough_components(cset) &&
    441 		    cset->ac->clabel->autoconfigure==1) {
    442 			retcode = rf_auto_config_set(cset,&raidID);
    443 			if (!retcode) {
    444 				if (cset->rootable) {
    445 					rootID = raidID;
    446 					num_root++;
    447 				}
    448 			} else {
    449 				/* The autoconfig didn't work :( */
    450 #if DEBUG
    451 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    452 #endif
    453 				rf_release_all_vps(cset);
    454 #if DEBUG
    455 				printf("Done cleanup\n");
    456 #endif
    457 			}
    458 		} else {
    459 			/* we're not autoconfiguring this set...
    460 			   release the associated resources */
    461 #if DEBUG
    462 			printf("Releasing vp's\n");
    463 #endif
    464 			rf_release_all_vps(cset);
    465 #if DEBUG
    466 			printf("Done.\n");
    467 #endif
    468 		}
    469 		/* cleanup */
    470 #if DEBUG
    471 		printf("Cleaning up config set\n");
    472 #endif
    473 		rf_cleanup_config_set(cset);
    474 #if DEBUG
    475 		printf("Done cleanup\n");
    476 #endif
    477 		cset = next_cset;
    478 	}
    479 	if (boothowto & RB_ASKNAME) {
    480 		/* We don't auto-config... */
    481 	} else {
    482 		/* They didn't ask, and we found something bootable... */
    483 		/* XXX pretend for now.. */
    484 		if (num_root == 1) {
    485 #if 1
    486 			booted_device = &raidrootdev[rootID];
    487 #endif
    488 		} else if (num_root > 1) {
    489 			/* we can't guess.. require the user to answer... */
    490 			boothowto |= RB_ASKNAME;
    491 		}
    492 	}
    493 }
    494 
    495 
    496 int
    497 raidsize(dev)
    498 	dev_t   dev;
    499 {
    500 	struct raid_softc *rs;
    501 	struct disklabel *lp;
    502 	int     part, unit, omask, size;
    503 
    504 	unit = raidunit(dev);
    505 	if (unit >= numraid)
    506 		return (-1);
    507 	rs = &raid_softc[unit];
    508 
    509 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    510 		return (-1);
    511 
    512 	part = DISKPART(dev);
    513 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    514 	lp = rs->sc_dkdev.dk_label;
    515 
    516 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    517 		return (-1);
    518 
    519 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    520 		size = -1;
    521 	else
    522 		size = lp->d_partitions[part].p_size *
    523 		    (lp->d_secsize / DEV_BSIZE);
    524 
    525 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    526 		return (-1);
    527 
    528 	return (size);
    529 
    530 }
    531 
    532 int
    533 raiddump(dev, blkno, va, size)
    534 	dev_t   dev;
    535 	daddr_t blkno;
    536 	caddr_t va;
    537 	size_t  size;
    538 {
    539 	/* Not implemented. */
    540 	return ENXIO;
    541 }
    542 /* ARGSUSED */
    543 int
    544 raidopen(dev, flags, fmt, p)
    545 	dev_t   dev;
    546 	int     flags, fmt;
    547 	struct proc *p;
    548 {
    549 	int     unit = raidunit(dev);
    550 	struct raid_softc *rs;
    551 	struct disklabel *lp;
    552 	int     part, pmask;
    553 	int     error = 0;
    554 
    555 	if (unit >= numraid)
    556 		return (ENXIO);
    557 	rs = &raid_softc[unit];
    558 
    559 	if ((error = raidlock(rs)) != 0)
    560 		return (error);
    561 	lp = rs->sc_dkdev.dk_label;
    562 
    563 	part = DISKPART(dev);
    564 	pmask = (1 << part);
    565 
    566 	db1_printf(("Opening raid device number: %d partition: %d\n",
    567 		unit, part));
    568 
    569 
    570 	if ((rs->sc_flags & RAIDF_INITED) &&
    571 	    (rs->sc_dkdev.dk_openmask == 0))
    572 		raidgetdisklabel(dev);
    573 
    574 	/* make sure that this partition exists */
    575 
    576 	if (part != RAW_PART) {
    577 		db1_printf(("Not a raw partition..\n"));
    578 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    579 		    ((part >= lp->d_npartitions) ||
    580 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    581 			error = ENXIO;
    582 			raidunlock(rs);
    583 			db1_printf(("Bailing out...\n"));
    584 			return (error);
    585 		}
    586 	}
    587 	/* Prevent this unit from being unconfigured while open. */
    588 	switch (fmt) {
    589 	case S_IFCHR:
    590 		rs->sc_dkdev.dk_copenmask |= pmask;
    591 		break;
    592 
    593 	case S_IFBLK:
    594 		rs->sc_dkdev.dk_bopenmask |= pmask;
    595 		break;
    596 	}
    597 
    598 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    599 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    600 		/* First one... mark things as dirty... Note that we *MUST*
    601 		 have done a configure before this.  I DO NOT WANT TO BE
    602 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    603 		 THAT THEY BELONG TOGETHER!!!!! */
    604 		/* XXX should check to see if we're only open for reading
    605 		   here... If so, we needn't do this, but then need some
    606 		   other way of keeping track of what's happened.. */
    607 
    608 		rf_markalldirty( raidPtrs[unit] );
    609 	}
    610 
    611 
    612 	rs->sc_dkdev.dk_openmask =
    613 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    614 
    615 	raidunlock(rs);
    616 
    617 	return (error);
    618 
    619 
    620 }
    621 /* ARGSUSED */
    622 int
    623 raidclose(dev, flags, fmt, p)
    624 	dev_t   dev;
    625 	int     flags, fmt;
    626 	struct proc *p;
    627 {
    628 	int     unit = raidunit(dev);
    629 	struct raid_softc *rs;
    630 	int     error = 0;
    631 	int     part;
    632 
    633 	if (unit >= numraid)
    634 		return (ENXIO);
    635 	rs = &raid_softc[unit];
    636 
    637 	if ((error = raidlock(rs)) != 0)
    638 		return (error);
    639 
    640 	part = DISKPART(dev);
    641 
    642 	/* ...that much closer to allowing unconfiguration... */
    643 	switch (fmt) {
    644 	case S_IFCHR:
    645 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    646 		break;
    647 
    648 	case S_IFBLK:
    649 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    650 		break;
    651 	}
    652 	rs->sc_dkdev.dk_openmask =
    653 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    654 
    655 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    656 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    657 		/* Last one... device is not unconfigured yet.
    658 		   Device shutdown has taken care of setting the
    659 		   clean bits if RAIDF_INITED is not set
    660 		   mark things as clean... */
    661 #ifdef DEBUG
    662 		printf("Last one on raid%d.  Updating status.\n",unit);
    663 #endif
    664 		rf_update_component_labels( raidPtrs[unit] );
    665 	}
    666 
    667 	raidunlock(rs);
    668 	return (0);
    669 
    670 }
    671 
    672 void
    673 raidstrategy(bp)
    674 	register struct buf *bp;
    675 {
    676 	register int s;
    677 
    678 	unsigned int raidID = raidunit(bp->b_dev);
    679 	RF_Raid_t *raidPtr;
    680 	struct raid_softc *rs = &raid_softc[raidID];
    681 	struct disklabel *lp;
    682 	int     wlabel;
    683 
    684 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    685 		bp->b_error = ENXIO;
    686 		bp->b_flags = B_ERROR;
    687 		bp->b_resid = bp->b_bcount;
    688 		biodone(bp);
    689 		return;
    690 	}
    691 	if (raidID >= numraid || !raidPtrs[raidID]) {
    692 		bp->b_error = ENODEV;
    693 		bp->b_flags |= B_ERROR;
    694 		bp->b_resid = bp->b_bcount;
    695 		biodone(bp);
    696 		return;
    697 	}
    698 	raidPtr = raidPtrs[raidID];
    699 	if (!raidPtr->valid) {
    700 		bp->b_error = ENODEV;
    701 		bp->b_flags |= B_ERROR;
    702 		bp->b_resid = bp->b_bcount;
    703 		biodone(bp);
    704 		return;
    705 	}
    706 	if (bp->b_bcount == 0) {
    707 		db1_printf(("b_bcount is zero..\n"));
    708 		biodone(bp);
    709 		return;
    710 	}
    711 	lp = rs->sc_dkdev.dk_label;
    712 
    713 	/*
    714 	 * Do bounds checking and adjust transfer.  If there's an
    715 	 * error, the bounds check will flag that for us.
    716 	 */
    717 
    718 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    719 	if (DISKPART(bp->b_dev) != RAW_PART)
    720 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    721 			db1_printf(("Bounds check failed!!:%d %d\n",
    722 				(int) bp->b_blkno, (int) wlabel));
    723 			biodone(bp);
    724 			return;
    725 		}
    726 	s = splbio();
    727 
    728 	bp->b_resid = 0;
    729 
    730 	/* stuff it onto our queue */
    731 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    732 
    733 	raidstart(raidPtrs[raidID]);
    734 
    735 	splx(s);
    736 }
    737 /* ARGSUSED */
    738 int
    739 raidread(dev, uio, flags)
    740 	dev_t   dev;
    741 	struct uio *uio;
    742 	int     flags;
    743 {
    744 	int     unit = raidunit(dev);
    745 	struct raid_softc *rs;
    746 	int     part;
    747 
    748 	if (unit >= numraid)
    749 		return (ENXIO);
    750 	rs = &raid_softc[unit];
    751 
    752 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    753 		return (ENXIO);
    754 	part = DISKPART(dev);
    755 
    756 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    757 
    758 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    759 
    760 }
    761 /* ARGSUSED */
    762 int
    763 raidwrite(dev, uio, flags)
    764 	dev_t   dev;
    765 	struct uio *uio;
    766 	int     flags;
    767 {
    768 	int     unit = raidunit(dev);
    769 	struct raid_softc *rs;
    770 
    771 	if (unit >= numraid)
    772 		return (ENXIO);
    773 	rs = &raid_softc[unit];
    774 
    775 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    776 		return (ENXIO);
    777 	db1_printf(("raidwrite\n"));
    778 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    779 
    780 }
    781 
    782 int
    783 raidioctl(dev, cmd, data, flag, p)
    784 	dev_t   dev;
    785 	u_long  cmd;
    786 	caddr_t data;
    787 	int     flag;
    788 	struct proc *p;
    789 {
    790 	int     unit = raidunit(dev);
    791 	int     error = 0;
    792 	int     part, pmask;
    793 	struct raid_softc *rs;
    794 	RF_Config_t *k_cfg, *u_cfg;
    795 	RF_Raid_t *raidPtr;
    796 	RF_RaidDisk_t *diskPtr;
    797 	RF_AccTotals_t *totals;
    798 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    799 	u_char *specific_buf;
    800 	int retcode = 0;
    801 	int row;
    802 	int column;
    803 	struct rf_recon_req *rrcopy, *rr;
    804 	RF_ComponentLabel_t *clabel;
    805 	RF_ComponentLabel_t ci_label;
    806 	RF_ComponentLabel_t **clabel_ptr;
    807 	RF_SingleComponent_t *sparePtr,*componentPtr;
    808 	RF_SingleComponent_t hot_spare;
    809 	RF_SingleComponent_t component;
    810 	int i, j, d;
    811 
    812 	if (unit >= numraid)
    813 		return (ENXIO);
    814 	rs = &raid_softc[unit];
    815 	raidPtr = raidPtrs[unit];
    816 
    817 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    818 		(int) DISKPART(dev), (int) unit, (int) cmd));
    819 
    820 	/* Must be open for writes for these commands... */
    821 	switch (cmd) {
    822 	case DIOCSDINFO:
    823 	case DIOCWDINFO:
    824 	case DIOCWLABEL:
    825 		if ((flag & FWRITE) == 0)
    826 			return (EBADF);
    827 	}
    828 
    829 	/* Must be initialized for these... */
    830 	switch (cmd) {
    831 	case DIOCGDINFO:
    832 	case DIOCSDINFO:
    833 	case DIOCWDINFO:
    834 	case DIOCGPART:
    835 	case DIOCWLABEL:
    836 	case DIOCGDEFLABEL:
    837 	case RAIDFRAME_SHUTDOWN:
    838 	case RAIDFRAME_REWRITEPARITY:
    839 	case RAIDFRAME_GET_INFO:
    840 	case RAIDFRAME_RESET_ACCTOTALS:
    841 	case RAIDFRAME_GET_ACCTOTALS:
    842 	case RAIDFRAME_KEEP_ACCTOTALS:
    843 	case RAIDFRAME_GET_SIZE:
    844 	case RAIDFRAME_FAIL_DISK:
    845 	case RAIDFRAME_COPYBACK:
    846 	case RAIDFRAME_CHECK_RECON_STATUS:
    847 	case RAIDFRAME_GET_COMPONENT_LABEL:
    848 	case RAIDFRAME_SET_COMPONENT_LABEL:
    849 	case RAIDFRAME_ADD_HOT_SPARE:
    850 	case RAIDFRAME_REMOVE_HOT_SPARE:
    851 	case RAIDFRAME_INIT_LABELS:
    852 	case RAIDFRAME_REBUILD_IN_PLACE:
    853 	case RAIDFRAME_CHECK_PARITY:
    854 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    855 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    856 	case RAIDFRAME_SET_AUTOCONFIG:
    857 	case RAIDFRAME_SET_ROOT:
    858 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    859 			return (ENXIO);
    860 	}
    861 
    862 	switch (cmd) {
    863 
    864 		/* configure the system */
    865 	case RAIDFRAME_CONFIGURE:
    866 
    867 		if (raidPtr->valid) {
    868 			/* There is a valid RAID set running on this unit! */
    869 			printf("raid%d: Device already configured!\n",unit);
    870 		}
    871 
    872 		/* copy-in the configuration information */
    873 		/* data points to a pointer to the configuration structure */
    874 
    875 		u_cfg = *((RF_Config_t **) data);
    876 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    877 		if (k_cfg == NULL) {
    878 			return (ENOMEM);
    879 		}
    880 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    881 		    sizeof(RF_Config_t));
    882 		if (retcode) {
    883 			RF_Free(k_cfg, sizeof(RF_Config_t));
    884 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    885 				retcode));
    886 			return (retcode);
    887 		}
    888 		/* allocate a buffer for the layout-specific data, and copy it
    889 		 * in */
    890 		if (k_cfg->layoutSpecificSize) {
    891 			if (k_cfg->layoutSpecificSize > 10000) {
    892 				/* sanity check */
    893 				RF_Free(k_cfg, sizeof(RF_Config_t));
    894 				return (EINVAL);
    895 			}
    896 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    897 			    (u_char *));
    898 			if (specific_buf == NULL) {
    899 				RF_Free(k_cfg, sizeof(RF_Config_t));
    900 				return (ENOMEM);
    901 			}
    902 			retcode = copyin(k_cfg->layoutSpecific,
    903 			    (caddr_t) specific_buf,
    904 			    k_cfg->layoutSpecificSize);
    905 			if (retcode) {
    906 				RF_Free(k_cfg, sizeof(RF_Config_t));
    907 				RF_Free(specific_buf,
    908 					k_cfg->layoutSpecificSize);
    909 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    910 					retcode));
    911 				return (retcode);
    912 			}
    913 		} else
    914 			specific_buf = NULL;
    915 		k_cfg->layoutSpecific = specific_buf;
    916 
    917 		/* should do some kind of sanity check on the configuration.
    918 		 * Store the sum of all the bytes in the last byte? */
    919 
    920 		/* configure the system */
    921 
    922 		/*
    923 		 * Clear the entire RAID descriptor, just to make sure
    924 		 *  there is no stale data left in the case of a
    925 		 *  reconfiguration
    926 		 */
    927 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    928 		raidPtr->raidid = unit;
    929 
    930 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    931 
    932 		if (retcode == 0) {
    933 
    934 			/* allow this many simultaneous IO's to
    935 			   this RAID device */
    936 			raidPtr->openings = RAIDOUTSTANDING;
    937 
    938 			raidinit(raidPtr);
    939 			rf_markalldirty(raidPtr);
    940 		}
    941 		/* free the buffers.  No return code here. */
    942 		if (k_cfg->layoutSpecificSize) {
    943 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    944 		}
    945 		RF_Free(k_cfg, sizeof(RF_Config_t));
    946 
    947 		return (retcode);
    948 
    949 		/* shutdown the system */
    950 	case RAIDFRAME_SHUTDOWN:
    951 
    952 		if ((error = raidlock(rs)) != 0)
    953 			return (error);
    954 
    955 		/*
    956 		 * If somebody has a partition mounted, we shouldn't
    957 		 * shutdown.
    958 		 */
    959 
    960 		part = DISKPART(dev);
    961 		pmask = (1 << part);
    962 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    963 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    964 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    965 			raidunlock(rs);
    966 			return (EBUSY);
    967 		}
    968 
    969 		retcode = rf_Shutdown(raidPtr);
    970 
    971 		pool_destroy(&rs->sc_cbufpool);
    972 
    973 		/* It's no longer initialized... */
    974 		rs->sc_flags &= ~RAIDF_INITED;
    975 
    976 		/* Detach the disk. */
    977 		disk_detach(&rs->sc_dkdev);
    978 
    979 		raidunlock(rs);
    980 
    981 		return (retcode);
    982 	case RAIDFRAME_GET_COMPONENT_LABEL:
    983 		clabel_ptr = (RF_ComponentLabel_t **) data;
    984 		/* need to read the component label for the disk indicated
    985 		   by row,column in clabel */
    986 
    987 		/* For practice, let's get it directly fromdisk, rather
    988 		   than from the in-core copy */
    989 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    990 			   (RF_ComponentLabel_t *));
    991 		if (clabel == NULL)
    992 			return (ENOMEM);
    993 
    994 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    995 
    996 		retcode = copyin( *clabel_ptr, clabel,
    997 				  sizeof(RF_ComponentLabel_t));
    998 
    999 		if (retcode) {
   1000 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1001 			return(retcode);
   1002 		}
   1003 
   1004 		row = clabel->row;
   1005 		column = clabel->column;
   1006 
   1007 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1008 		    (column < 0) || (column >= raidPtr->numCol)) {
   1009 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1010 			return(EINVAL);
   1011 		}
   1012 
   1013 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1014 				raidPtr->raid_cinfo[row][column].ci_vp,
   1015 				clabel );
   1016 
   1017 		retcode = copyout((caddr_t) clabel,
   1018 				  (caddr_t) *clabel_ptr,
   1019 				  sizeof(RF_ComponentLabel_t));
   1020 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1021 		return (retcode);
   1022 
   1023 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1024 		clabel = (RF_ComponentLabel_t *) data;
   1025 
   1026 		/* XXX check the label for valid stuff... */
   1027 		/* Note that some things *should not* get modified --
   1028 		   the user should be re-initing the labels instead of
   1029 		   trying to patch things.
   1030 		   */
   1031 
   1032 		printf("Got component label:\n");
   1033 		printf("Version: %d\n",clabel->version);
   1034 		printf("Serial Number: %d\n",clabel->serial_number);
   1035 		printf("Mod counter: %d\n",clabel->mod_counter);
   1036 		printf("Row: %d\n", clabel->row);
   1037 		printf("Column: %d\n", clabel->column);
   1038 		printf("Num Rows: %d\n", clabel->num_rows);
   1039 		printf("Num Columns: %d\n", clabel->num_columns);
   1040 		printf("Clean: %d\n", clabel->clean);
   1041 		printf("Status: %d\n", clabel->status);
   1042 
   1043 		row = clabel->row;
   1044 		column = clabel->column;
   1045 
   1046 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1047 		    (column < 0) || (column >= raidPtr->numCol)) {
   1048 			return(EINVAL);
   1049 		}
   1050 
   1051 		/* XXX this isn't allowed to do anything for now :-) */
   1052 
   1053 		/* XXX and before it is, we need to fill in the rest
   1054 		   of the fields!?!?!?! */
   1055 #if 0
   1056 		raidwrite_component_label(
   1057                             raidPtr->Disks[row][column].dev,
   1058 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1059 			    clabel );
   1060 #endif
   1061 		return (0);
   1062 
   1063 	case RAIDFRAME_INIT_LABELS:
   1064 		clabel = (RF_ComponentLabel_t *) data;
   1065 		/*
   1066 		   we only want the serial number from
   1067 		   the above.  We get all the rest of the information
   1068 		   from the config that was used to create this RAID
   1069 		   set.
   1070 		   */
   1071 
   1072 		raidPtr->serial_number = clabel->serial_number;
   1073 
   1074 		raid_init_component_label(raidPtr, &ci_label);
   1075 		ci_label.serial_number = clabel->serial_number;
   1076 
   1077 		for(row=0;row<raidPtr->numRow;row++) {
   1078 			ci_label.row = row;
   1079 			for(column=0;column<raidPtr->numCol;column++) {
   1080 				diskPtr = &raidPtr->Disks[row][column];
   1081 				ci_label.partitionSize = diskPtr->partitionSize;
   1082 				ci_label.column = column;
   1083 				raidwrite_component_label(
   1084 				  raidPtr->Disks[row][column].dev,
   1085 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1086 				  &ci_label );
   1087 			}
   1088 		}
   1089 
   1090 		return (retcode);
   1091 	case RAIDFRAME_SET_AUTOCONFIG:
   1092 		d = rf_set_autoconfig(raidPtr, *data);
   1093 		printf("New autoconfig value is: %d\n", d);
   1094 		*data = d;
   1095 		return (retcode);
   1096 
   1097 	case RAIDFRAME_SET_ROOT:
   1098 		d = rf_set_rootpartition(raidPtr, *data);
   1099 		printf("New rootpartition value is: %d\n", d);
   1100 		*data = d;
   1101 		return (retcode);
   1102 
   1103 		/* initialize all parity */
   1104 	case RAIDFRAME_REWRITEPARITY:
   1105 
   1106 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1107 			/* Parity for RAID 0 is trivially correct */
   1108 			raidPtr->parity_good = RF_RAID_CLEAN;
   1109 			return(0);
   1110 		}
   1111 
   1112 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1113 			/* Re-write is already in progress! */
   1114 			return(EINVAL);
   1115 		}
   1116 
   1117 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1118 					   rf_RewriteParityThread,
   1119 					   raidPtr,"raid_parity");
   1120 		return (retcode);
   1121 
   1122 
   1123 	case RAIDFRAME_ADD_HOT_SPARE:
   1124 		sparePtr = (RF_SingleComponent_t *) data;
   1125 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1126 		printf("Adding spare\n");
   1127 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1128 		return(retcode);
   1129 
   1130 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1131 		return(retcode);
   1132 
   1133 	case RAIDFRAME_REBUILD_IN_PLACE:
   1134 
   1135 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1136 			/* Can't do this on a RAID 0!! */
   1137 			return(EINVAL);
   1138 		}
   1139 
   1140 		if (raidPtr->recon_in_progress == 1) {
   1141 			/* a reconstruct is already in progress! */
   1142 			return(EINVAL);
   1143 		}
   1144 
   1145 		componentPtr = (RF_SingleComponent_t *) data;
   1146 		memcpy( &component, componentPtr,
   1147 			sizeof(RF_SingleComponent_t));
   1148 		row = component.row;
   1149 		column = component.column;
   1150 		printf("Rebuild: %d %d\n",row, column);
   1151 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1152 		    (column < 0) || (column >= raidPtr->numCol)) {
   1153 			return(EINVAL);
   1154 		}
   1155 
   1156 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1157 		if (rrcopy == NULL)
   1158 			return(ENOMEM);
   1159 
   1160 		rrcopy->raidPtr = (void *) raidPtr;
   1161 		rrcopy->row = row;
   1162 		rrcopy->col = column;
   1163 
   1164 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1165 					   rf_ReconstructInPlaceThread,
   1166 					   rrcopy,"raid_reconip");
   1167 		return(retcode);
   1168 
   1169 	case RAIDFRAME_GET_INFO:
   1170 		if (!raidPtr->valid)
   1171 			return (ENODEV);
   1172 		ucfgp = (RF_DeviceConfig_t **) data;
   1173 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1174 			  (RF_DeviceConfig_t *));
   1175 		if (d_cfg == NULL)
   1176 			return (ENOMEM);
   1177 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1178 		d_cfg->rows = raidPtr->numRow;
   1179 		d_cfg->cols = raidPtr->numCol;
   1180 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1181 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1182 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1183 			return (ENOMEM);
   1184 		}
   1185 		d_cfg->nspares = raidPtr->numSpare;
   1186 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1187 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1188 			return (ENOMEM);
   1189 		}
   1190 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1191 		d = 0;
   1192 		for (i = 0; i < d_cfg->rows; i++) {
   1193 			for (j = 0; j < d_cfg->cols; j++) {
   1194 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1195 				d++;
   1196 			}
   1197 		}
   1198 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1199 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1200 		}
   1201 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1202 				  sizeof(RF_DeviceConfig_t));
   1203 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1204 
   1205 		return (retcode);
   1206 
   1207 	case RAIDFRAME_CHECK_PARITY:
   1208 		*(int *) data = raidPtr->parity_good;
   1209 		return (0);
   1210 
   1211 	case RAIDFRAME_RESET_ACCTOTALS:
   1212 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1213 		return (0);
   1214 
   1215 	case RAIDFRAME_GET_ACCTOTALS:
   1216 		totals = (RF_AccTotals_t *) data;
   1217 		*totals = raidPtr->acc_totals;
   1218 		return (0);
   1219 
   1220 	case RAIDFRAME_KEEP_ACCTOTALS:
   1221 		raidPtr->keep_acc_totals = *(int *)data;
   1222 		return (0);
   1223 
   1224 	case RAIDFRAME_GET_SIZE:
   1225 		*(int *) data = raidPtr->totalSectors;
   1226 		return (0);
   1227 
   1228 		/* fail a disk & optionally start reconstruction */
   1229 	case RAIDFRAME_FAIL_DISK:
   1230 
   1231 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1232 			/* Can't do this on a RAID 0!! */
   1233 			return(EINVAL);
   1234 		}
   1235 
   1236 		rr = (struct rf_recon_req *) data;
   1237 
   1238 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1239 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1240 			return (EINVAL);
   1241 
   1242 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1243 		       unit, rr->row, rr->col);
   1244 
   1245 		/* make a copy of the recon request so that we don't rely on
   1246 		 * the user's buffer */
   1247 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1248 		if (rrcopy == NULL)
   1249 			return(ENOMEM);
   1250 		bcopy(rr, rrcopy, sizeof(*rr));
   1251 		rrcopy->raidPtr = (void *) raidPtr;
   1252 
   1253 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1254 					   rf_ReconThread,
   1255 					   rrcopy,"raid_recon");
   1256 		return (0);
   1257 
   1258 		/* invoke a copyback operation after recon on whatever disk
   1259 		 * needs it, if any */
   1260 	case RAIDFRAME_COPYBACK:
   1261 
   1262 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1263 			/* This makes no sense on a RAID 0!! */
   1264 			return(EINVAL);
   1265 		}
   1266 
   1267 		if (raidPtr->copyback_in_progress == 1) {
   1268 			/* Copyback is already in progress! */
   1269 			return(EINVAL);
   1270 		}
   1271 
   1272 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1273 					   rf_CopybackThread,
   1274 					   raidPtr,"raid_copyback");
   1275 		return (retcode);
   1276 
   1277 		/* return the percentage completion of reconstruction */
   1278 	case RAIDFRAME_CHECK_RECON_STATUS:
   1279 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1280 			/* This makes no sense on a RAID 0 */
   1281 			return(EINVAL);
   1282 		}
   1283 		row = 0; /* XXX we only consider a single row... */
   1284 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1285 			*(int *) data = 100;
   1286 		else
   1287 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1288 		return (0);
   1289 
   1290 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1291 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1292 			/* This makes no sense on a RAID 0 */
   1293 			return(EINVAL);
   1294 		}
   1295 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1296 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1297 		} else {
   1298 			*(int *) data = 100;
   1299 		}
   1300 		return (0);
   1301 
   1302 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1303 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1304 			/* This makes no sense on a RAID 0 */
   1305 			return(EINVAL);
   1306 		}
   1307 		if (raidPtr->copyback_in_progress == 1) {
   1308 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1309 				raidPtr->Layout.numStripe;
   1310 		} else {
   1311 			*(int *) data = 100;
   1312 		}
   1313 		return (0);
   1314 
   1315 
   1316 		/* the sparetable daemon calls this to wait for the kernel to
   1317 		 * need a spare table. this ioctl does not return until a
   1318 		 * spare table is needed. XXX -- calling mpsleep here in the
   1319 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1320 		 * -- I should either compute the spare table in the kernel,
   1321 		 * or have a different -- XXX XXX -- interface (a different
   1322 		 * character device) for delivering the table     -- XXX */
   1323 #if 0
   1324 	case RAIDFRAME_SPARET_WAIT:
   1325 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1326 		while (!rf_sparet_wait_queue)
   1327 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1328 		waitreq = rf_sparet_wait_queue;
   1329 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1330 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1331 
   1332 		/* structure assignment */
   1333 		*((RF_SparetWait_t *) data) = *waitreq;
   1334 
   1335 		RF_Free(waitreq, sizeof(*waitreq));
   1336 		return (0);
   1337 
   1338 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1339 		 * code in it that will cause the dameon to exit */
   1340 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1341 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1342 		waitreq->fcol = -1;
   1343 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1344 		waitreq->next = rf_sparet_wait_queue;
   1345 		rf_sparet_wait_queue = waitreq;
   1346 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1347 		wakeup(&rf_sparet_wait_queue);
   1348 		return (0);
   1349 
   1350 		/* used by the spare table daemon to deliver a spare table
   1351 		 * into the kernel */
   1352 	case RAIDFRAME_SEND_SPARET:
   1353 
   1354 		/* install the spare table */
   1355 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1356 
   1357 		/* respond to the requestor.  the return status of the spare
   1358 		 * table installation is passed in the "fcol" field */
   1359 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1360 		waitreq->fcol = retcode;
   1361 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1362 		waitreq->next = rf_sparet_resp_queue;
   1363 		rf_sparet_resp_queue = waitreq;
   1364 		wakeup(&rf_sparet_resp_queue);
   1365 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1366 
   1367 		return (retcode);
   1368 #endif
   1369 
   1370 	default:
   1371 		break; /* fall through to the os-specific code below */
   1372 
   1373 	}
   1374 
   1375 	if (!raidPtr->valid)
   1376 		return (EINVAL);
   1377 
   1378 	/*
   1379 	 * Add support for "regular" device ioctls here.
   1380 	 */
   1381 
   1382 	switch (cmd) {
   1383 	case DIOCGDINFO:
   1384 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1385 		break;
   1386 
   1387 	case DIOCGPART:
   1388 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1389 		((struct partinfo *) data)->part =
   1390 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1391 		break;
   1392 
   1393 	case DIOCWDINFO:
   1394 	case DIOCSDINFO:
   1395 		if ((error = raidlock(rs)) != 0)
   1396 			return (error);
   1397 
   1398 		rs->sc_flags |= RAIDF_LABELLING;
   1399 
   1400 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1401 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1402 		if (error == 0) {
   1403 			if (cmd == DIOCWDINFO)
   1404 				error = writedisklabel(RAIDLABELDEV(dev),
   1405 				    raidstrategy, rs->sc_dkdev.dk_label,
   1406 				    rs->sc_dkdev.dk_cpulabel);
   1407 		}
   1408 		rs->sc_flags &= ~RAIDF_LABELLING;
   1409 
   1410 		raidunlock(rs);
   1411 
   1412 		if (error)
   1413 			return (error);
   1414 		break;
   1415 
   1416 	case DIOCWLABEL:
   1417 		if (*(int *) data != 0)
   1418 			rs->sc_flags |= RAIDF_WLABEL;
   1419 		else
   1420 			rs->sc_flags &= ~RAIDF_WLABEL;
   1421 		break;
   1422 
   1423 	case DIOCGDEFLABEL:
   1424 		raidgetdefaultlabel(raidPtr, rs,
   1425 		    (struct disklabel *) data);
   1426 		break;
   1427 
   1428 	default:
   1429 		retcode = ENOTTY;
   1430 	}
   1431 	return (retcode);
   1432 
   1433 }
   1434 
   1435 
   1436 /* raidinit -- complete the rest of the initialization for the
   1437    RAIDframe device.  */
   1438 
   1439 
   1440 static void
   1441 raidinit(raidPtr)
   1442 	RF_Raid_t *raidPtr;
   1443 {
   1444 	struct raid_softc *rs;
   1445 	int     unit;
   1446 
   1447 	unit = raidPtr->raidid;
   1448 
   1449 	rs = &raid_softc[unit];
   1450 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1451 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1452 
   1453 
   1454 	/* XXX should check return code first... */
   1455 	rs->sc_flags |= RAIDF_INITED;
   1456 
   1457 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1458 
   1459 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1460 
   1461 	/* disk_attach actually creates space for the CPU disklabel, among
   1462 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1463 	 * with disklabels. */
   1464 
   1465 	disk_attach(&rs->sc_dkdev);
   1466 
   1467 	/* XXX There may be a weird interaction here between this, and
   1468 	 * protectedSectors, as used in RAIDframe.  */
   1469 
   1470 	rs->sc_size = raidPtr->totalSectors;
   1471 
   1472 }
   1473 
   1474 /* wake up the daemon & tell it to get us a spare table
   1475  * XXX
   1476  * the entries in the queues should be tagged with the raidPtr
   1477  * so that in the extremely rare case that two recons happen at once,
   1478  * we know for which device were requesting a spare table
   1479  * XXX
   1480  *
   1481  * XXX This code is not currently used. GO
   1482  */
   1483 int
   1484 rf_GetSpareTableFromDaemon(req)
   1485 	RF_SparetWait_t *req;
   1486 {
   1487 	int     retcode;
   1488 
   1489 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1490 	req->next = rf_sparet_wait_queue;
   1491 	rf_sparet_wait_queue = req;
   1492 	wakeup(&rf_sparet_wait_queue);
   1493 
   1494 	/* mpsleep unlocks the mutex */
   1495 	while (!rf_sparet_resp_queue) {
   1496 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1497 		    "raidframe getsparetable", 0);
   1498 	}
   1499 	req = rf_sparet_resp_queue;
   1500 	rf_sparet_resp_queue = req->next;
   1501 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1502 
   1503 	retcode = req->fcol;
   1504 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1505 					 * alloc'd */
   1506 	return (retcode);
   1507 }
   1508 
   1509 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1510  * bp & passes it down.
   1511  * any calls originating in the kernel must use non-blocking I/O
   1512  * do some extra sanity checking to return "appropriate" error values for
   1513  * certain conditions (to make some standard utilities work)
   1514  *
   1515  * Formerly known as: rf_DoAccessKernel
   1516  */
   1517 void
   1518 raidstart(raidPtr)
   1519 	RF_Raid_t *raidPtr;
   1520 {
   1521 	RF_SectorCount_t num_blocks, pb, sum;
   1522 	RF_RaidAddr_t raid_addr;
   1523 	int     retcode;
   1524 	struct partition *pp;
   1525 	daddr_t blocknum;
   1526 	int     unit;
   1527 	struct raid_softc *rs;
   1528 	int     do_async;
   1529 	struct buf *bp;
   1530 
   1531 	unit = raidPtr->raidid;
   1532 	rs = &raid_softc[unit];
   1533 
   1534 	/* quick check to see if anything has died recently */
   1535 	RF_LOCK_MUTEX(raidPtr->mutex);
   1536 	if (raidPtr->numNewFailures > 0) {
   1537 		rf_update_component_labels(raidPtr);
   1538 		raidPtr->numNewFailures--;
   1539 	}
   1540 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1541 
   1542 	/* Check to see if we're at the limit... */
   1543 	RF_LOCK_MUTEX(raidPtr->mutex);
   1544 	while (raidPtr->openings > 0) {
   1545 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1546 
   1547 		/* get the next item, if any, from the queue */
   1548 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1549 			/* nothing more to do */
   1550 			return;
   1551 		}
   1552 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1553 
   1554 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1555 		 * partition.. Need to make it absolute to the underlying
   1556 		 * device.. */
   1557 
   1558 		blocknum = bp->b_blkno;
   1559 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1560 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1561 			blocknum += pp->p_offset;
   1562 		}
   1563 
   1564 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1565 			    (int) blocknum));
   1566 
   1567 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1568 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1569 
   1570 		/* *THIS* is where we adjust what block we're going to...
   1571 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1572 		raid_addr = blocknum;
   1573 
   1574 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1575 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1576 		sum = raid_addr + num_blocks + pb;
   1577 		if (1 || rf_debugKernelAccess) {
   1578 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1579 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1580 				    (int) pb, (int) bp->b_resid));
   1581 		}
   1582 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1583 		    || (sum < num_blocks) || (sum < pb)) {
   1584 			bp->b_error = ENOSPC;
   1585 			bp->b_flags |= B_ERROR;
   1586 			bp->b_resid = bp->b_bcount;
   1587 			biodone(bp);
   1588 			RF_LOCK_MUTEX(raidPtr->mutex);
   1589 			continue;
   1590 		}
   1591 		/*
   1592 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1593 		 */
   1594 
   1595 		if (bp->b_bcount & raidPtr->sectorMask) {
   1596 			bp->b_error = EINVAL;
   1597 			bp->b_flags |= B_ERROR;
   1598 			bp->b_resid = bp->b_bcount;
   1599 			biodone(bp);
   1600 			RF_LOCK_MUTEX(raidPtr->mutex);
   1601 			continue;
   1602 
   1603 		}
   1604 		db1_printf(("Calling DoAccess..\n"));
   1605 
   1606 
   1607 		RF_LOCK_MUTEX(raidPtr->mutex);
   1608 		raidPtr->openings--;
   1609 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1610 
   1611 		/*
   1612 		 * Everything is async.
   1613 		 */
   1614 		do_async = 1;
   1615 
   1616 		/* don't ever condition on bp->b_flags & B_WRITE.
   1617 		 * always condition on B_READ instead */
   1618 
   1619 		/* XXX we're still at splbio() here... do we *really*
   1620 		   need to be? */
   1621 
   1622 
   1623 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1624 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1625 				      do_async, raid_addr, num_blocks,
   1626 				      bp->b_un.b_addr, bp, NULL, NULL,
   1627 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1628 
   1629 
   1630 		RF_LOCK_MUTEX(raidPtr->mutex);
   1631 	}
   1632 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1633 }
   1634 
   1635 
   1636 
   1637 
   1638 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1639 
   1640 int
   1641 rf_DispatchKernelIO(queue, req)
   1642 	RF_DiskQueue_t *queue;
   1643 	RF_DiskQueueData_t *req;
   1644 {
   1645 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1646 	struct buf *bp;
   1647 	struct raidbuf *raidbp = NULL;
   1648 	struct raid_softc *rs;
   1649 	int     unit;
   1650 	int s;
   1651 
   1652 	s=0;
   1653 	/* s = splbio();*/ /* want to test this */
   1654 	/* XXX along with the vnode, we also need the softc associated with
   1655 	 * this device.. */
   1656 
   1657 	req->queue = queue;
   1658 
   1659 	unit = queue->raidPtr->raidid;
   1660 
   1661 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1662 
   1663 	if (unit >= numraid) {
   1664 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1665 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1666 	}
   1667 	rs = &raid_softc[unit];
   1668 
   1669 	/* XXX is this the right place? */
   1670 	disk_busy(&rs->sc_dkdev);
   1671 
   1672 	bp = req->bp;
   1673 #if 1
   1674 	/* XXX when there is a physical disk failure, someone is passing us a
   1675 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1676 	 * without taking a performance hit... (not sure where the real bug
   1677 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1678 
   1679 	if (bp->b_flags & B_ERROR) {
   1680 		bp->b_flags &= ~B_ERROR;
   1681 	}
   1682 	if (bp->b_error != 0) {
   1683 		bp->b_error = 0;
   1684 	}
   1685 #endif
   1686 	raidbp = RAIDGETBUF(rs);
   1687 
   1688 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1689 
   1690 	/*
   1691 	 * context for raidiodone
   1692 	 */
   1693 	raidbp->rf_obp = bp;
   1694 	raidbp->req = req;
   1695 
   1696 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1697 
   1698 	switch (req->type) {
   1699 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1700 		/* XXX need to do something extra here.. */
   1701 		/* I'm leaving this in, as I've never actually seen it used,
   1702 		 * and I'd like folks to report it... GO */
   1703 		printf(("WAKEUP CALLED\n"));
   1704 		queue->numOutstanding++;
   1705 
   1706 		/* XXX need to glue the original buffer into this??  */
   1707 
   1708 		KernelWakeupFunc(&raidbp->rf_buf);
   1709 		break;
   1710 
   1711 	case RF_IO_TYPE_READ:
   1712 	case RF_IO_TYPE_WRITE:
   1713 
   1714 		if (req->tracerec) {
   1715 			RF_ETIMER_START(req->tracerec->timer);
   1716 		}
   1717 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1718 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1719 		    req->sectorOffset, req->numSector,
   1720 		    req->buf, KernelWakeupFunc, (void *) req,
   1721 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1722 
   1723 		if (rf_debugKernelAccess) {
   1724 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1725 				(long) bp->b_blkno));
   1726 		}
   1727 		queue->numOutstanding++;
   1728 		queue->last_deq_sector = req->sectorOffset;
   1729 		/* acc wouldn't have been let in if there were any pending
   1730 		 * reqs at any other priority */
   1731 		queue->curPriority = req->priority;
   1732 
   1733 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1734 			req->type, unit, queue->row, queue->col));
   1735 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1736 			(int) req->sectorOffset, (int) req->numSector,
   1737 			(int) (req->numSector <<
   1738 			    queue->raidPtr->logBytesPerSector),
   1739 			(int) queue->raidPtr->logBytesPerSector));
   1740 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1741 			raidbp->rf_buf.b_vp->v_numoutput++;
   1742 		}
   1743 		VOP_STRATEGY(&raidbp->rf_buf);
   1744 
   1745 		break;
   1746 
   1747 	default:
   1748 		panic("bad req->type in rf_DispatchKernelIO");
   1749 	}
   1750 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1751 	/* splx(s); */ /* want to test this */
   1752 	return (0);
   1753 }
   1754 /* this is the callback function associated with a I/O invoked from
   1755    kernel code.
   1756  */
   1757 static void
   1758 KernelWakeupFunc(vbp)
   1759 	struct buf *vbp;
   1760 {
   1761 	RF_DiskQueueData_t *req = NULL;
   1762 	RF_DiskQueue_t *queue;
   1763 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1764 	struct buf *bp;
   1765 	struct raid_softc *rs;
   1766 	int     unit;
   1767 	register int s;
   1768 
   1769 	s = splbio();
   1770 	db1_printf(("recovering the request queue:\n"));
   1771 	req = raidbp->req;
   1772 
   1773 	bp = raidbp->rf_obp;
   1774 
   1775 	queue = (RF_DiskQueue_t *) req->queue;
   1776 
   1777 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1778 		bp->b_flags |= B_ERROR;
   1779 		bp->b_error = raidbp->rf_buf.b_error ?
   1780 		    raidbp->rf_buf.b_error : EIO;
   1781 	}
   1782 
   1783 	/* XXX methinks this could be wrong... */
   1784 #if 1
   1785 	bp->b_resid = raidbp->rf_buf.b_resid;
   1786 #endif
   1787 
   1788 	if (req->tracerec) {
   1789 		RF_ETIMER_STOP(req->tracerec->timer);
   1790 		RF_ETIMER_EVAL(req->tracerec->timer);
   1791 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1792 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1793 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1794 		req->tracerec->num_phys_ios++;
   1795 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1796 	}
   1797 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1798 
   1799 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1800 
   1801 
   1802 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1803 	 * ballistic, and mark the component as hosed... */
   1804 
   1805 	if (bp->b_flags & B_ERROR) {
   1806 		/* Mark the disk as dead */
   1807 		/* but only mark it once... */
   1808 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1809 		    rf_ds_optimal) {
   1810 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1811 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1812 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1813 			    rf_ds_failed;
   1814 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1815 			queue->raidPtr->numFailures++;
   1816 			queue->raidPtr->numNewFailures++;
   1817 			/* XXX here we should bump the version number for each component, and write that data out */
   1818 		} else {	/* Disk is already dead... */
   1819 			/* printf("Disk already marked as dead!\n"); */
   1820 		}
   1821 
   1822 	}
   1823 
   1824 	rs = &raid_softc[unit];
   1825 	RAIDPUTBUF(rs, raidbp);
   1826 
   1827 
   1828 	if (bp->b_resid == 0) {
   1829 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1830 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1831 	}
   1832 
   1833 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1834 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1835 
   1836 	splx(s);
   1837 }
   1838 
   1839 
   1840 
   1841 /*
   1842  * initialize a buf structure for doing an I/O in the kernel.
   1843  */
   1844 static void
   1845 InitBP(
   1846     struct buf * bp,
   1847     struct vnode * b_vp,
   1848     unsigned rw_flag,
   1849     dev_t dev,
   1850     RF_SectorNum_t startSect,
   1851     RF_SectorCount_t numSect,
   1852     caddr_t buf,
   1853     void (*cbFunc) (struct buf *),
   1854     void *cbArg,
   1855     int logBytesPerSector,
   1856     struct proc * b_proc)
   1857 {
   1858 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1859 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1860 	bp->b_bcount = numSect << logBytesPerSector;
   1861 	bp->b_bufsize = bp->b_bcount;
   1862 	bp->b_error = 0;
   1863 	bp->b_dev = dev;
   1864 	bp->b_un.b_addr = buf;
   1865 	bp->b_blkno = startSect;
   1866 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1867 	if (bp->b_bcount == 0) {
   1868 		panic("bp->b_bcount is zero in InitBP!!\n");
   1869 	}
   1870 	bp->b_proc = b_proc;
   1871 	bp->b_iodone = cbFunc;
   1872 	bp->b_vp = b_vp;
   1873 
   1874 }
   1875 
   1876 static void
   1877 raidgetdefaultlabel(raidPtr, rs, lp)
   1878 	RF_Raid_t *raidPtr;
   1879 	struct raid_softc *rs;
   1880 	struct disklabel *lp;
   1881 {
   1882 	db1_printf(("Building a default label...\n"));
   1883 	bzero(lp, sizeof(*lp));
   1884 
   1885 	/* fabricate a label... */
   1886 	lp->d_secperunit = raidPtr->totalSectors;
   1887 	lp->d_secsize = raidPtr->bytesPerSector;
   1888 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1889 	lp->d_ntracks = 1;
   1890 	lp->d_ncylinders = raidPtr->totalSectors /
   1891 		(lp->d_nsectors * lp->d_ntracks);
   1892 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1893 
   1894 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1895 	lp->d_type = DTYPE_RAID;
   1896 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1897 	lp->d_rpm = 3600;
   1898 	lp->d_interleave = 1;
   1899 	lp->d_flags = 0;
   1900 
   1901 	lp->d_partitions[RAW_PART].p_offset = 0;
   1902 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1903 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1904 	lp->d_npartitions = RAW_PART + 1;
   1905 
   1906 	lp->d_magic = DISKMAGIC;
   1907 	lp->d_magic2 = DISKMAGIC;
   1908 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1909 
   1910 }
   1911 /*
   1912  * Read the disklabel from the raid device.  If one is not present, fake one
   1913  * up.
   1914  */
   1915 static void
   1916 raidgetdisklabel(dev)
   1917 	dev_t   dev;
   1918 {
   1919 	int     unit = raidunit(dev);
   1920 	struct raid_softc *rs = &raid_softc[unit];
   1921 	char   *errstring;
   1922 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1923 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1924 	RF_Raid_t *raidPtr;
   1925 
   1926 	db1_printf(("Getting the disklabel...\n"));
   1927 
   1928 	bzero(clp, sizeof(*clp));
   1929 
   1930 	raidPtr = raidPtrs[unit];
   1931 
   1932 	raidgetdefaultlabel(raidPtr, rs, lp);
   1933 
   1934 	/*
   1935 	 * Call the generic disklabel extraction routine.
   1936 	 */
   1937 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1938 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1939 	if (errstring)
   1940 		raidmakedisklabel(rs);
   1941 	else {
   1942 		int     i;
   1943 		struct partition *pp;
   1944 
   1945 		/*
   1946 		 * Sanity check whether the found disklabel is valid.
   1947 		 *
   1948 		 * This is necessary since total size of the raid device
   1949 		 * may vary when an interleave is changed even though exactly
   1950 		 * same componets are used, and old disklabel may used
   1951 		 * if that is found.
   1952 		 */
   1953 		if (lp->d_secperunit != rs->sc_size)
   1954 			printf("WARNING: %s: "
   1955 			    "total sector size in disklabel (%d) != "
   1956 			    "the size of raid (%ld)\n", rs->sc_xname,
   1957 			    lp->d_secperunit, (long) rs->sc_size);
   1958 		for (i = 0; i < lp->d_npartitions; i++) {
   1959 			pp = &lp->d_partitions[i];
   1960 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1961 				printf("WARNING: %s: end of partition `%c' "
   1962 				    "exceeds the size of raid (%ld)\n",
   1963 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1964 		}
   1965 	}
   1966 
   1967 }
   1968 /*
   1969  * Take care of things one might want to take care of in the event
   1970  * that a disklabel isn't present.
   1971  */
   1972 static void
   1973 raidmakedisklabel(rs)
   1974 	struct raid_softc *rs;
   1975 {
   1976 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1977 	db1_printf(("Making a label..\n"));
   1978 
   1979 	/*
   1980 	 * For historical reasons, if there's no disklabel present
   1981 	 * the raw partition must be marked FS_BSDFFS.
   1982 	 */
   1983 
   1984 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1985 
   1986 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1987 
   1988 	lp->d_checksum = dkcksum(lp);
   1989 }
   1990 /*
   1991  * Lookup the provided name in the filesystem.  If the file exists,
   1992  * is a valid block device, and isn't being used by anyone else,
   1993  * set *vpp to the file's vnode.
   1994  * You'll find the original of this in ccd.c
   1995  */
   1996 int
   1997 raidlookup(path, p, vpp)
   1998 	char   *path;
   1999 	struct proc *p;
   2000 	struct vnode **vpp;	/* result */
   2001 {
   2002 	struct nameidata nd;
   2003 	struct vnode *vp;
   2004 	struct vattr va;
   2005 	int     error;
   2006 
   2007 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2008 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2009 #ifdef DEBUG
   2010 		printf("RAIDframe: vn_open returned %d\n", error);
   2011 #endif
   2012 		return (error);
   2013 	}
   2014 	vp = nd.ni_vp;
   2015 	if (vp->v_usecount > 1) {
   2016 		VOP_UNLOCK(vp, 0);
   2017 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2018 		return (EBUSY);
   2019 	}
   2020 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2021 		VOP_UNLOCK(vp, 0);
   2022 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2023 		return (error);
   2024 	}
   2025 	/* XXX: eventually we should handle VREG, too. */
   2026 	if (va.va_type != VBLK) {
   2027 		VOP_UNLOCK(vp, 0);
   2028 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2029 		return (ENOTBLK);
   2030 	}
   2031 	VOP_UNLOCK(vp, 0);
   2032 	*vpp = vp;
   2033 	return (0);
   2034 }
   2035 /*
   2036  * Wait interruptibly for an exclusive lock.
   2037  *
   2038  * XXX
   2039  * Several drivers do this; it should be abstracted and made MP-safe.
   2040  * (Hmm... where have we seen this warning before :->  GO )
   2041  */
   2042 static int
   2043 raidlock(rs)
   2044 	struct raid_softc *rs;
   2045 {
   2046 	int     error;
   2047 
   2048 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2049 		rs->sc_flags |= RAIDF_WANTED;
   2050 		if ((error =
   2051 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2052 			return (error);
   2053 	}
   2054 	rs->sc_flags |= RAIDF_LOCKED;
   2055 	return (0);
   2056 }
   2057 /*
   2058  * Unlock and wake up any waiters.
   2059  */
   2060 static void
   2061 raidunlock(rs)
   2062 	struct raid_softc *rs;
   2063 {
   2064 
   2065 	rs->sc_flags &= ~RAIDF_LOCKED;
   2066 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2067 		rs->sc_flags &= ~RAIDF_WANTED;
   2068 		wakeup(rs);
   2069 	}
   2070 }
   2071 
   2072 
   2073 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2074 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2075 
   2076 int
   2077 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2078 {
   2079 	RF_ComponentLabel_t clabel;
   2080 	raidread_component_label(dev, b_vp, &clabel);
   2081 	clabel.mod_counter = mod_counter;
   2082 	clabel.clean = RF_RAID_CLEAN;
   2083 	raidwrite_component_label(dev, b_vp, &clabel);
   2084 	return(0);
   2085 }
   2086 
   2087 
   2088 int
   2089 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2090 {
   2091 	RF_ComponentLabel_t clabel;
   2092 	raidread_component_label(dev, b_vp, &clabel);
   2093 	clabel.mod_counter = mod_counter;
   2094 	clabel.clean = RF_RAID_DIRTY;
   2095 	raidwrite_component_label(dev, b_vp, &clabel);
   2096 	return(0);
   2097 }
   2098 
   2099 /* ARGSUSED */
   2100 int
   2101 raidread_component_label(dev, b_vp, clabel)
   2102 	dev_t dev;
   2103 	struct vnode *b_vp;
   2104 	RF_ComponentLabel_t *clabel;
   2105 {
   2106 	struct buf *bp;
   2107 	int error;
   2108 
   2109 	/* XXX should probably ensure that we don't try to do this if
   2110 	   someone has changed rf_protected_sectors. */
   2111 
   2112 	/* get a block of the appropriate size... */
   2113 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2114 	bp->b_dev = dev;
   2115 
   2116 	/* get our ducks in a row for the read */
   2117 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2118 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2119 	bp->b_flags = B_BUSY | B_READ;
   2120  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2121 
   2122 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2123 
   2124 	error = biowait(bp);
   2125 
   2126 	if (!error) {
   2127 		memcpy(clabel, bp->b_un.b_addr,
   2128 		       sizeof(RF_ComponentLabel_t));
   2129 #if 0
   2130 		print_component_label( clabel );
   2131 #endif
   2132         } else {
   2133 #if 0
   2134 		printf("Failed to read RAID component label!\n");
   2135 #endif
   2136 	}
   2137 
   2138         bp->b_flags = B_INVAL | B_AGE;
   2139 	brelse(bp);
   2140 	return(error);
   2141 }
   2142 /* ARGSUSED */
   2143 int
   2144 raidwrite_component_label(dev, b_vp, clabel)
   2145 	dev_t dev;
   2146 	struct vnode *b_vp;
   2147 	RF_ComponentLabel_t *clabel;
   2148 {
   2149 	struct buf *bp;
   2150 	int error;
   2151 
   2152 	/* get a block of the appropriate size... */
   2153 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2154 	bp->b_dev = dev;
   2155 
   2156 	/* get our ducks in a row for the write */
   2157 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2158 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2159 	bp->b_flags = B_BUSY | B_WRITE;
   2160  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2161 
   2162 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2163 
   2164 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2165 
   2166 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2167 	error = biowait(bp);
   2168         bp->b_flags = B_INVAL | B_AGE;
   2169 	brelse(bp);
   2170 	if (error) {
   2171 #if 1
   2172 		printf("Failed to write RAID component info!\n");
   2173 #endif
   2174 	}
   2175 
   2176 	return(error);
   2177 }
   2178 
   2179 void
   2180 rf_markalldirty( raidPtr )
   2181 	RF_Raid_t *raidPtr;
   2182 {
   2183 	RF_ComponentLabel_t clabel;
   2184 	int r,c;
   2185 
   2186 	raidPtr->mod_counter++;
   2187 	for (r = 0; r < raidPtr->numRow; r++) {
   2188 		for (c = 0; c < raidPtr->numCol; c++) {
   2189 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2190 				raidread_component_label(
   2191 					raidPtr->Disks[r][c].dev,
   2192 					raidPtr->raid_cinfo[r][c].ci_vp,
   2193 					&clabel);
   2194 				if (clabel.status == rf_ds_spared) {
   2195 					/* XXX do something special...
   2196 					 but whatever you do, don't
   2197 					 try to access it!! */
   2198 				} else {
   2199 #if 0
   2200 				clabel.status =
   2201 					raidPtr->Disks[r][c].status;
   2202 				raidwrite_component_label(
   2203 					raidPtr->Disks[r][c].dev,
   2204 					raidPtr->raid_cinfo[r][c].ci_vp,
   2205 					&clabel);
   2206 #endif
   2207 				raidmarkdirty(
   2208 				       raidPtr->Disks[r][c].dev,
   2209 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2210 				       raidPtr->mod_counter);
   2211 				}
   2212 			}
   2213 		}
   2214 	}
   2215 	/* printf("Component labels marked dirty.\n"); */
   2216 #if 0
   2217 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2218 		sparecol = raidPtr->numCol + c;
   2219 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2220 			/*
   2221 
   2222 			   XXX this is where we get fancy and map this spare
   2223 			   into it's correct spot in the array.
   2224 
   2225 			 */
   2226 			/*
   2227 
   2228 			   we claim this disk is "optimal" if it's
   2229 			   rf_ds_used_spare, as that means it should be
   2230 			   directly substitutable for the disk it replaced.
   2231 			   We note that too...
   2232 
   2233 			 */
   2234 
   2235 			for(i=0;i<raidPtr->numRow;i++) {
   2236 				for(j=0;j<raidPtr->numCol;j++) {
   2237 					if ((raidPtr->Disks[i][j].spareRow ==
   2238 					     r) &&
   2239 					    (raidPtr->Disks[i][j].spareCol ==
   2240 					     sparecol)) {
   2241 						srow = r;
   2242 						scol = sparecol;
   2243 						break;
   2244 					}
   2245 				}
   2246 			}
   2247 
   2248 			raidread_component_label(
   2249 				      raidPtr->Disks[r][sparecol].dev,
   2250 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2251 				      &clabel);
   2252 			/* make sure status is noted */
   2253 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2254 			clabel.mod_counter = raidPtr->mod_counter;
   2255 			clabel.serial_number = raidPtr->serial_number;
   2256 			clabel.row = srow;
   2257 			clabel.column = scol;
   2258 			clabel.num_rows = raidPtr->numRow;
   2259 			clabel.num_columns = raidPtr->numCol;
   2260 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2261 			clabel.status = rf_ds_optimal;
   2262 			raidwrite_component_label(
   2263 				      raidPtr->Disks[r][sparecol].dev,
   2264 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2265 				      &clabel);
   2266 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2267 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2268 		}
   2269 	}
   2270 
   2271 #endif
   2272 }
   2273 
   2274 
   2275 void
   2276 rf_update_component_labels( raidPtr )
   2277 	RF_Raid_t *raidPtr;
   2278 {
   2279 	RF_ComponentLabel_t clabel;
   2280 	int sparecol;
   2281 	int r,c;
   2282 	int i,j;
   2283 	int srow, scol;
   2284 
   2285 	srow = -1;
   2286 	scol = -1;
   2287 
   2288 	/* XXX should do extra checks to make sure things really are clean,
   2289 	   rather than blindly setting the clean bit... */
   2290 
   2291 	raidPtr->mod_counter++;
   2292 
   2293 	for (r = 0; r < raidPtr->numRow; r++) {
   2294 		for (c = 0; c < raidPtr->numCol; c++) {
   2295 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2296 				raidread_component_label(
   2297 					raidPtr->Disks[r][c].dev,
   2298 					raidPtr->raid_cinfo[r][c].ci_vp,
   2299 					&clabel);
   2300 				/* make sure status is noted */
   2301 				clabel.status = rf_ds_optimal;
   2302 				/* bump the counter */
   2303 				clabel.mod_counter++;
   2304 #if 0
   2305 				/* note where this set was configured last */
   2306 				clabel.last_unit = raidPtr->raidid;
   2307 #endif
   2308 #if DEBUG
   2309 				if (raidPtr->mod_counter !=
   2310 				    clabel.mod_counter) {
   2311 					printf("raid%d: mod_counter for row: %d col: %d not in sync\n", raidPtr->raidid, r, c);
   2312 				}
   2313 #endif
   2314 
   2315 				raidwrite_component_label(
   2316 					raidPtr->Disks[r][c].dev,
   2317 					raidPtr->raid_cinfo[r][c].ci_vp,
   2318 					&clabel);
   2319 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2320 					raidmarkclean(
   2321 					      raidPtr->Disks[r][c].dev,
   2322 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2323 					      raidPtr->mod_counter);
   2324 				}
   2325 			}
   2326 			/* else we don't touch it.. */
   2327 #if 0
   2328 			else if (raidPtr->Disks[r][c].status !=
   2329 				   rf_ds_failed) {
   2330 				raidread_component_label(
   2331 					raidPtr->Disks[r][c].dev,
   2332 					raidPtr->raid_cinfo[r][c].ci_vp,
   2333 					&clabel);
   2334 				/* make sure status is noted */
   2335 				clabel.status =
   2336 					raidPtr->Disks[r][c].status;
   2337 				raidwrite_component_label(
   2338 					raidPtr->Disks[r][c].dev,
   2339 					raidPtr->raid_cinfo[r][c].ci_vp,
   2340 					&clabel);
   2341 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2342 					raidmarkclean(
   2343 					      raidPtr->Disks[r][c].dev,
   2344 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2345 					      raidPtr->mod_counter);
   2346 				}
   2347 			}
   2348 #endif
   2349 		}
   2350 	}
   2351 
   2352 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2353 		sparecol = raidPtr->numCol + c;
   2354 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2355 			/*
   2356 
   2357 			   we claim this disk is "optimal" if it's
   2358 			   rf_ds_used_spare, as that means it should be
   2359 			   directly substitutable for the disk it replaced.
   2360 			   We note that too...
   2361 
   2362 			 */
   2363 
   2364 			for(i=0;i<raidPtr->numRow;i++) {
   2365 				for(j=0;j<raidPtr->numCol;j++) {
   2366 					if ((raidPtr->Disks[i][j].spareRow ==
   2367 					     0) &&
   2368 					    (raidPtr->Disks[i][j].spareCol ==
   2369 					     sparecol)) {
   2370 						srow = i;
   2371 						scol = j;
   2372 						break;
   2373 					}
   2374 				}
   2375 			}
   2376 
   2377 			/* XXX shouldn't *really* need this... */
   2378 			raidread_component_label(
   2379 				      raidPtr->Disks[0][sparecol].dev,
   2380 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2381 				      &clabel);
   2382 			/* make sure status is noted */
   2383 
   2384 			raid_init_component_label(raidPtr, &clabel);
   2385 
   2386 			clabel.mod_counter = raidPtr->mod_counter;
   2387 			clabel.row = srow;
   2388 			clabel.column = scol;
   2389 			clabel.status = rf_ds_optimal;
   2390 
   2391 			raidwrite_component_label(
   2392 				      raidPtr->Disks[0][sparecol].dev,
   2393 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2394 				      &clabel);
   2395 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2396 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2397 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2398 					       raidPtr->mod_counter);
   2399 			}
   2400 		}
   2401 	}
   2402 	/* 	printf("Component labels updated\n"); */
   2403 }
   2404 
   2405 void
   2406 rf_ReconThread(req)
   2407 	struct rf_recon_req *req;
   2408 {
   2409 	int     s;
   2410 	RF_Raid_t *raidPtr;
   2411 
   2412 	s = splbio();
   2413 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2414 	raidPtr->recon_in_progress = 1;
   2415 
   2416 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2417 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2418 
   2419 	/* XXX get rid of this! we don't need it at all.. */
   2420 	RF_Free(req, sizeof(*req));
   2421 
   2422 	raidPtr->recon_in_progress = 0;
   2423 	splx(s);
   2424 
   2425 	/* That's all... */
   2426 	kthread_exit(0);        /* does not return */
   2427 }
   2428 
   2429 void
   2430 rf_RewriteParityThread(raidPtr)
   2431 	RF_Raid_t *raidPtr;
   2432 {
   2433 	int retcode;
   2434 	int s;
   2435 
   2436 	raidPtr->parity_rewrite_in_progress = 1;
   2437 	s = splbio();
   2438 	retcode = rf_RewriteParity(raidPtr);
   2439 	splx(s);
   2440 	if (retcode) {
   2441 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2442 	} else {
   2443 		/* set the clean bit!  If we shutdown correctly,
   2444 		   the clean bit on each component label will get
   2445 		   set */
   2446 		raidPtr->parity_good = RF_RAID_CLEAN;
   2447 	}
   2448 	raidPtr->parity_rewrite_in_progress = 0;
   2449 
   2450 	/* That's all... */
   2451 	kthread_exit(0);        /* does not return */
   2452 }
   2453 
   2454 
   2455 void
   2456 rf_CopybackThread(raidPtr)
   2457 	RF_Raid_t *raidPtr;
   2458 {
   2459 	int s;
   2460 
   2461 	raidPtr->copyback_in_progress = 1;
   2462 	s = splbio();
   2463 	rf_CopybackReconstructedData(raidPtr);
   2464 	splx(s);
   2465 	raidPtr->copyback_in_progress = 0;
   2466 
   2467 	/* That's all... */
   2468 	kthread_exit(0);        /* does not return */
   2469 }
   2470 
   2471 
   2472 void
   2473 rf_ReconstructInPlaceThread(req)
   2474 	struct rf_recon_req *req;
   2475 {
   2476 	int retcode;
   2477 	int s;
   2478 	RF_Raid_t *raidPtr;
   2479 
   2480 	s = splbio();
   2481 	raidPtr = req->raidPtr;
   2482 	raidPtr->recon_in_progress = 1;
   2483 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2484 	RF_Free(req, sizeof(*req));
   2485 	raidPtr->recon_in_progress = 0;
   2486 	splx(s);
   2487 
   2488 	/* That's all... */
   2489 	kthread_exit(0);        /* does not return */
   2490 }
   2491 
   2492 void
   2493 rf_mountroot_hook(dev)
   2494 	struct device *dev;
   2495 {
   2496 
   2497 }
   2498 
   2499 
   2500 RF_AutoConfig_t *
   2501 rf_find_raid_components()
   2502 {
   2503 	struct devnametobdevmaj *dtobdm;
   2504 	struct vnode *vp;
   2505 	struct disklabel label;
   2506 	struct device *dv;
   2507 	char *cd_name;
   2508 	dev_t dev;
   2509 	int error;
   2510 	int i;
   2511 	int good_one;
   2512 	RF_ComponentLabel_t *clabel;
   2513 	RF_AutoConfig_t *ac_list;
   2514 	RF_AutoConfig_t *ac;
   2515 
   2516 
   2517 	/* initialize the AutoConfig list */
   2518 	ac_list = NULL;
   2519 
   2520 if (raidautoconfig) {
   2521 
   2522 	/* we begin by trolling through *all* the devices on the system */
   2523 
   2524 	for (dv = alldevs.tqh_first; dv != NULL;
   2525 	     dv = dv->dv_list.tqe_next) {
   2526 
   2527 		/* we are only interested in disks... */
   2528 		if (dv->dv_class != DV_DISK)
   2529 			continue;
   2530 
   2531 		/* we don't care about floppies... */
   2532 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2533 			continue;
   2534 		}
   2535 
   2536 		/* need to find the device_name_to_block_device_major stuff */
   2537 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2538 		dtobdm = dev_name2blk;
   2539 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2540 			dtobdm++;
   2541 		}
   2542 
   2543 		/* get a vnode for the raw partition of this disk */
   2544 
   2545 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2546 		if (bdevvp(dev, &vp))
   2547 			panic("RAID can't alloc vnode");
   2548 
   2549 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2550 
   2551 		if (error) {
   2552 			/* "Who cares."  Continue looking
   2553 			   for something that exists*/
   2554 			vput(vp);
   2555 			continue;
   2556 		}
   2557 
   2558 		/* Ok, the disk exists.  Go get the disklabel. */
   2559 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2560 				  FREAD, NOCRED, 0);
   2561 		if (error) {
   2562 			/*
   2563 			 * XXX can't happen - open() would
   2564 			 * have errored out (or faked up one)
   2565 			 */
   2566 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2567 			       dv->dv_xname, 'a' + RAW_PART, error);
   2568 		}
   2569 
   2570 		/* don't need this any more.  We'll allocate it again
   2571 		   a little later if we really do... */
   2572 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2573 		vput(vp);
   2574 
   2575 		for (i=0; i < label.d_npartitions; i++) {
   2576 			/* We only support partitions marked as RAID */
   2577 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2578 				continue;
   2579 
   2580 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2581 			if (bdevvp(dev, &vp))
   2582 				panic("RAID can't alloc vnode");
   2583 
   2584 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2585 			if (error) {
   2586 				/* Whatever... */
   2587 				vput(vp);
   2588 				continue;
   2589 			}
   2590 
   2591 			good_one = 0;
   2592 
   2593 			clabel = (RF_ComponentLabel_t *)
   2594 				malloc(sizeof(RF_ComponentLabel_t),
   2595 				       M_RAIDFRAME, M_NOWAIT);
   2596 			if (clabel == NULL) {
   2597 				/* XXX CLEANUP HERE */
   2598 				printf("RAID auto config: out of memory!\n");
   2599 				return(NULL); /* XXX probably should panic? */
   2600 			}
   2601 
   2602 			if (!raidread_component_label(dev, vp, clabel)) {
   2603 				/* Got the label.  Does it look reasonable? */
   2604 				if (rf_reasonable_label(clabel) &&
   2605 				    (clabel->partitionSize <=
   2606 				     label.d_partitions[i].p_size)) {
   2607 #if DEBUG
   2608 					printf("Component on: %s%c: %d\n",
   2609 					       dv->dv_xname, 'a'+i,
   2610 					       label.d_partitions[i].p_size);
   2611 					print_component_label(clabel);
   2612 #endif
   2613 					/* if it's reasonable, add it,
   2614 					   else ignore it. */
   2615 					ac = (RF_AutoConfig_t *)
   2616 						malloc(sizeof(RF_AutoConfig_t),
   2617 						       M_RAIDFRAME,
   2618 						       M_NOWAIT);
   2619 					if (ac == NULL) {
   2620 						/* XXX should panic?? */
   2621 						return(NULL);
   2622 					}
   2623 
   2624 					sprintf(ac->devname, "%s%c",
   2625 						dv->dv_xname, 'a'+i);
   2626 					ac->dev = dev;
   2627 					ac->vp = vp;
   2628 					ac->clabel = clabel;
   2629 					ac->next = ac_list;
   2630 					ac_list = ac;
   2631 					good_one = 1;
   2632 				}
   2633 			}
   2634 			if (!good_one) {
   2635 				/* cleanup */
   2636 				free(clabel, M_RAIDFRAME);
   2637 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2638 				vput(vp);
   2639 			}
   2640 		}
   2641 	}
   2642 }
   2643 return(ac_list);
   2644 }
   2645 
   2646 static int
   2647 rf_reasonable_label(clabel)
   2648 	RF_ComponentLabel_t *clabel;
   2649 {
   2650 
   2651 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2652 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2653 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2654 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2655 	    clabel->row >=0 &&
   2656 	    clabel->column >= 0 &&
   2657 	    clabel->num_rows > 0 &&
   2658 	    clabel->num_columns > 0 &&
   2659 	    clabel->row < clabel->num_rows &&
   2660 	    clabel->column < clabel->num_columns &&
   2661 	    clabel->blockSize > 0 &&
   2662 	    clabel->numBlocks > 0) {
   2663 		/* label looks reasonable enough... */
   2664 		return(1);
   2665 	}
   2666 	return(0);
   2667 }
   2668 
   2669 
   2670 void
   2671 print_component_label(clabel)
   2672 	RF_ComponentLabel_t *clabel;
   2673 {
   2674 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2675 	       clabel->row, clabel->column,
   2676 	       clabel->num_rows, clabel->num_columns);
   2677 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2678 	       clabel->version, clabel->serial_number,
   2679 	       clabel->mod_counter);
   2680 	printf("   Clean: %s Status: %d\n",
   2681 	       clabel->clean ? "Yes" : "No", clabel->status );
   2682 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2683 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2684 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2685 	       (char) clabel->parityConfig, clabel->blockSize,
   2686 	       clabel->numBlocks);
   2687 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2688 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2689 #if 0
   2690 	   printf("   Config order: %d\n", clabel->config_order);
   2691 #endif
   2692 
   2693 }
   2694 
   2695 RF_ConfigSet_t *
   2696 rf_create_auto_sets(ac_list)
   2697 	RF_AutoConfig_t *ac_list;
   2698 {
   2699 	RF_AutoConfig_t *ac;
   2700 	RF_ConfigSet_t *config_sets;
   2701 	RF_ConfigSet_t *cset;
   2702 	RF_AutoConfig_t *ac_next;
   2703 
   2704 
   2705 	config_sets = NULL;
   2706 
   2707 	/* Go through the AutoConfig list, and figure out which components
   2708 	   belong to what sets.  */
   2709 	ac = ac_list;
   2710 	while(ac!=NULL) {
   2711 		/* we're going to putz with ac->next, so save it here
   2712 		   for use at the end of the loop */
   2713 		ac_next = ac->next;
   2714 
   2715 		if (config_sets == NULL) {
   2716 			/* will need at least this one... */
   2717 			config_sets = (RF_ConfigSet_t *)
   2718 				malloc(sizeof(RF_ConfigSet_t),
   2719 				       M_RAIDFRAME, M_NOWAIT);
   2720 			if (config_sets == NULL) {
   2721 				panic("rf_create_auto_sets: No memory!\n");
   2722 			}
   2723 			/* this one is easy :) */
   2724 			config_sets->ac = ac;
   2725 			config_sets->next = NULL;
   2726 			config_sets->rootable = 0;
   2727 			ac->next = NULL;
   2728 		} else {
   2729 			/* which set does this component fit into? */
   2730 			cset = config_sets;
   2731 			while(cset!=NULL) {
   2732 				if (rf_does_it_fit(cset, ac)) {
   2733 					/* looks like it matches */
   2734 					ac->next = cset->ac;
   2735 					cset->ac = ac;
   2736 					break;
   2737 				}
   2738 				cset = cset->next;
   2739 			}
   2740 			if (cset==NULL) {
   2741 				/* didn't find a match above... new set..*/
   2742 				cset = (RF_ConfigSet_t *)
   2743 					malloc(sizeof(RF_ConfigSet_t),
   2744 					       M_RAIDFRAME, M_NOWAIT);
   2745 				if (cset == NULL) {
   2746 					panic("rf_create_auto_sets: No memory!\n");
   2747 				}
   2748 				cset->ac = ac;
   2749 				ac->next = NULL;
   2750 				cset->next = config_sets;
   2751 				cset->rootable = 0;
   2752 				config_sets = cset;
   2753 			}
   2754 		}
   2755 		ac = ac_next;
   2756 	}
   2757 
   2758 
   2759 	return(config_sets);
   2760 }
   2761 
   2762 static int
   2763 rf_does_it_fit(cset, ac)
   2764 	RF_ConfigSet_t *cset;
   2765 	RF_AutoConfig_t *ac;
   2766 {
   2767 	RF_ComponentLabel_t *clabel1, *clabel2;
   2768 
   2769 	/* If this one matches the *first* one in the set, that's good
   2770 	   enough, since the other members of the set would have been
   2771 	   through here too... */
   2772 	/* note that we are not checking partitionSize here.. */
   2773 
   2774 	clabel1 = cset->ac->clabel;
   2775 	clabel2 = ac->clabel;
   2776 	if ((clabel1->version == clabel2->version) &&
   2777 	    (clabel1->serial_number == clabel2->serial_number) &&
   2778 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2779 	    (clabel1->num_rows == clabel2->num_rows) &&
   2780 	    (clabel1->num_columns == clabel2->num_columns) &&
   2781 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2782 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2783 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2784 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2785 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2786 	    (clabel1->blockSize == clabel2->blockSize) &&
   2787 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2788 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2789 	    (clabel1->root_partition == clabel2->root_partition) &&
   2790 	    (clabel1->last_unit == clabel2->last_unit) &&
   2791 	    (clabel1->config_order == clabel2->config_order)) {
   2792 		/* if it get's here, it almost *has* to be a match */
   2793 	} else {
   2794 		/* it's not consistent with somebody in the set..
   2795 		   punt */
   2796 		return(0);
   2797 	}
   2798 	/* all was fine.. it must fit... */
   2799 	return(1);
   2800 }
   2801 
   2802 int
   2803 rf_have_enough_components(cset)
   2804 	RF_ConfigSet_t *cset;
   2805 {
   2806 	RF_AutoConfig_t *ac;
   2807 	RF_AutoConfig_t *auto_config;
   2808 	RF_ComponentLabel_t *clabel;
   2809 	int r,c;
   2810 	int num_rows;
   2811 	int num_cols;
   2812 	int num_missing;
   2813 
   2814 	/* check to see that we have enough 'live' components
   2815 	   of this set.  If so, we can configure it if necessary */
   2816 
   2817 	num_rows = cset->ac->clabel->num_rows;
   2818 	num_cols = cset->ac->clabel->num_columns;
   2819 
   2820 	/* XXX Check for duplicate components!?!?!? */
   2821 
   2822 	num_missing = 0;
   2823 	auto_config = cset->ac;
   2824 
   2825 	for(r=0; r<num_rows; r++) {
   2826 		for(c=0; c<num_cols; c++) {
   2827 			ac = auto_config;
   2828 			while(ac!=NULL) {
   2829 				if (ac->clabel==NULL) {
   2830 					/* big-time bad news. */
   2831 					goto fail;
   2832 				}
   2833 				if ((ac->clabel->row == r) &&
   2834 				    (ac->clabel->column == c)) {
   2835 					/* it's this one... */
   2836 #if DEBUG
   2837 					printf("Found: %s at %d,%d\n",
   2838 					       ac->devname,r,c);
   2839 #endif
   2840 					break;
   2841 				}
   2842 				ac=ac->next;
   2843 			}
   2844 			if (ac==NULL) {
   2845 				/* Didn't find one here! */
   2846 				num_missing++;
   2847 			}
   2848 		}
   2849 	}
   2850 
   2851 	clabel = cset->ac->clabel;
   2852 
   2853 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2854 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2855 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2856 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2857 		/* XXX this needs to be made *much* more general */
   2858 		/* Too many failures */
   2859 		return(0);
   2860 	}
   2861 	/* otherwise, all is well, and we've got enough to take a kick
   2862 	   at autoconfiguring this set */
   2863 	return(1);
   2864 fail:
   2865 	return(0);
   2866 
   2867 }
   2868 
   2869 void
   2870 rf_create_configuration(ac,config,raidPtr)
   2871 	RF_AutoConfig_t *ac;
   2872 	RF_Config_t *config;
   2873 	RF_Raid_t *raidPtr;
   2874 {
   2875 	RF_ComponentLabel_t *clabel;
   2876 
   2877 	clabel = ac->clabel;
   2878 
   2879 	/* 1. Fill in the common stuff */
   2880 	config->numRow = clabel->num_rows;
   2881 	config->numCol = clabel->num_columns;
   2882 	config->numSpare = 0; /* XXX should this be set here? */
   2883 	config->sectPerSU = clabel->sectPerSU;
   2884 	config->SUsPerPU = clabel->SUsPerPU;
   2885 	config->SUsPerRU = clabel->SUsPerRU;
   2886 	config->parityConfig = clabel->parityConfig;
   2887 	/* XXX... */
   2888 	strcpy(config->diskQueueType,"fifo");
   2889 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2890 	config->layoutSpecificSize = 0; /* XXX ?? */
   2891 
   2892 	while(ac!=NULL) {
   2893 		/* row/col values will be in range due to the checks
   2894 		   in reasonable_label() */
   2895 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2896 		       ac->devname);
   2897 		ac = ac->next;
   2898 	}
   2899 
   2900 }
   2901 
   2902 int
   2903 rf_set_autoconfig(raidPtr, new_value)
   2904 	RF_Raid_t *raidPtr;
   2905 	int new_value;
   2906 {
   2907 	RF_ComponentLabel_t clabel;
   2908 	struct vnode *vp;
   2909 	dev_t dev;
   2910 	int row, column;
   2911 
   2912 	raidPtr->autoconfigure = new_value;
   2913 	for(row=0; row<raidPtr->numRow; row++) {
   2914 		for(column=0; column<raidPtr->numCol; column++) {
   2915 			dev = raidPtr->Disks[row][column].dev;
   2916 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2917 			raidread_component_label(dev, vp, &clabel);
   2918 			clabel.autoconfigure = new_value;
   2919 			raidwrite_component_label(dev, vp, &clabel);
   2920 		}
   2921 	}
   2922 	return(new_value);
   2923 }
   2924 
   2925 int
   2926 rf_set_rootpartition(raidPtr, new_value)
   2927 	RF_Raid_t *raidPtr;
   2928 	int new_value;
   2929 {
   2930 	RF_ComponentLabel_t clabel;
   2931 	struct vnode *vp;
   2932 	dev_t dev;
   2933 	int row, column;
   2934 
   2935 	raidPtr->root_partition = new_value;
   2936 	for(row=0; row<raidPtr->numRow; row++) {
   2937 		for(column=0; column<raidPtr->numCol; column++) {
   2938 			dev = raidPtr->Disks[row][column].dev;
   2939 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2940 			raidread_component_label(dev, vp, &clabel);
   2941 			clabel.root_partition = new_value;
   2942 			raidwrite_component_label(dev, vp, &clabel);
   2943 		}
   2944 	}
   2945 	return(new_value);
   2946 }
   2947 
   2948 void
   2949 rf_release_all_vps(cset)
   2950 	RF_ConfigSet_t *cset;
   2951 {
   2952 	RF_AutoConfig_t *ac;
   2953 
   2954 	ac = cset->ac;
   2955 	while(ac!=NULL) {
   2956 		/* Close the vp, and give it back */
   2957 		if (ac->vp) {
   2958 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2959 			vput(ac->vp);
   2960 		}
   2961 		ac = ac->next;
   2962 	}
   2963 }
   2964 
   2965 
   2966 void
   2967 rf_cleanup_config_set(cset)
   2968 	RF_ConfigSet_t *cset;
   2969 {
   2970 	RF_AutoConfig_t *ac;
   2971 	RF_AutoConfig_t *next_ac;
   2972 
   2973 	ac = cset->ac;
   2974 	while(ac!=NULL) {
   2975 		next_ac = ac->next;
   2976 		/* nuke the label */
   2977 		free(ac->clabel, M_RAIDFRAME);
   2978 		/* cleanup the config structure */
   2979 		free(ac, M_RAIDFRAME);
   2980 		/* "next.." */
   2981 		ac = next_ac;
   2982 	}
   2983 	/* and, finally, nuke the config set */
   2984 	free(cset, M_RAIDFRAME);
   2985 }
   2986 
   2987 
   2988 void
   2989 raid_init_component_label(raidPtr, clabel)
   2990 	RF_Raid_t *raidPtr;
   2991 	RF_ComponentLabel_t *clabel;
   2992 {
   2993 	/* current version number */
   2994 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   2995 	clabel->serial_number = raidPtr->serial_number;
   2996 	clabel->mod_counter = raidPtr->mod_counter;
   2997 	clabel->num_rows = raidPtr->numRow;
   2998 	clabel->num_columns = raidPtr->numCol;
   2999 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3000 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3001 
   3002 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3003 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3004 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3005 
   3006 	clabel->blockSize = raidPtr->bytesPerSector;
   3007 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3008 
   3009 	/* XXX not portable */
   3010 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3011 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3012 	clabel->autoconfigure = raidPtr->autoconfigure;
   3013 	clabel->root_partition = raidPtr->root_partition;
   3014 	clabel->last_unit = raidPtr->raidid;
   3015 	clabel->config_order = raidPtr->config_order;
   3016 }
   3017 
   3018 int
   3019 rf_auto_config_set(cset,unit)
   3020 	RF_ConfigSet_t *cset;
   3021 	int *unit;
   3022 {
   3023 	RF_Raid_t *raidPtr;
   3024 	RF_Config_t *config;
   3025 	int raidID;
   3026 	int retcode;
   3027 
   3028 	printf("Starting autoconfigure on raid%d\n",raidID);
   3029 
   3030 	retcode = 0;
   3031 	*unit = -1;
   3032 
   3033 	/* 1. Create a config structure */
   3034 
   3035 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3036 				       M_RAIDFRAME,
   3037 				       M_NOWAIT);
   3038 	if (config==NULL) {
   3039 		printf("Out of mem!?!?\n");
   3040 				/* XXX do something more intelligent here. */
   3041 		return(1);
   3042 	}
   3043 	/* XXX raidID needs to be set correctly.. */
   3044 
   3045 	/*
   3046 	   2. Figure out what RAID ID this one is supposed to live at
   3047 	   See if we can get the same RAID dev that it was configured
   3048 	   on last time..
   3049 	*/
   3050 
   3051 	raidID = cset->ac->clabel->last_unit;
   3052 	if ((raidID < 0) || (raidID >= numraid)) {
   3053 		/* let's not wander off into lala land. */
   3054 		raidID = numraid - 1;
   3055 	}
   3056 	if (raidPtrs[raidID]->valid != 0) {
   3057 
   3058 		/*
   3059 		   Nope... Go looking for an alternative...
   3060 		   Start high so we don't immediately use raid0 if that's
   3061 		   not taken.
   3062 		*/
   3063 
   3064 		for(raidID = numraid; raidID >= 0; raidID--) {
   3065 			if (raidPtrs[raidID]->valid == 0) {
   3066 				/* can use this one! */
   3067 				break;
   3068 			}
   3069 		}
   3070 	}
   3071 
   3072 	if (raidID < 0) {
   3073 		/* punt... */
   3074 		printf("Unable to auto configure this set!\n");
   3075 		printf("(Out of RAID devs!)\n");
   3076 		return(1);
   3077 	}
   3078 
   3079 	raidPtr = raidPtrs[raidID];
   3080 
   3081 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3082 	raidPtr->raidid = raidID;
   3083 	raidPtr->openings = RAIDOUTSTANDING;
   3084 
   3085 	/* 3. Build the configuration structure */
   3086 	rf_create_configuration(cset->ac, config, raidPtr);
   3087 
   3088 	/* 4. Do the configuration */
   3089 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3090 
   3091 	if (retcode == 0) {
   3092 #if DEBUG
   3093 		printf("Calling raidinit()\n");
   3094 #endif
   3095 				/* XXX the 0 below is bogus! */
   3096 		raidinit(raidPtrs[raidID]);
   3097 
   3098 		rf_markalldirty(raidPtrs[raidID]);
   3099 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3100 		if (cset->ac->clabel->root_partition==1) {
   3101 			/* everything configured just fine.  Make a note
   3102 			   that this set is eligible to be root. */
   3103 			cset->rootable = 1;
   3104 			/* XXX do this here? */
   3105 			raidPtrs[raidID]->root_partition = 1;
   3106 		}
   3107 	}
   3108 
   3109 	/* 5. Cleanup */
   3110 	free(config, M_RAIDFRAME);
   3111 
   3112 	*unit = raidID;
   3113 	return(retcode);
   3114 }
   3115