Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.58
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.58 2000/02/25 02:21:12 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static int raidinit __P((dev_t, RF_Raid_t *, int));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208    Note: Don't use sc_dev until the raidinit(0,_,_) call in
    209    rf_auto_config_set() actually passes in a real dev_t!  */
    210 
    211 struct raid_softc {
    212 	int     sc_flags;	/* flags */
    213 	int     sc_cflags;	/* configuration flags */
    214 	size_t  sc_size;        /* size of the raid device */
    215 	dev_t   sc_dev;	        /* our device.. */
    216 	char    sc_xname[20];	/* XXX external name */
    217 	struct disk sc_dkdev;	/* generic disk device info */
    218 	struct pool sc_cbufpool;	/* component buffer pool */
    219 	struct buf_queue buf_queue;	/* used for the device queue */
    220 };
    221 /* sc_flags */
    222 #define RAIDF_INITED	0x01	/* unit has been initialized */
    223 #define RAIDF_WLABEL	0x02	/* label area is writable */
    224 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    225 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    226 #define RAIDF_LOCKED	0x80	/* unit is locked */
    227 
    228 #define	raidunit(x)	DISKUNIT(x)
    229 int numraid = 0;
    230 
    231 /*
    232  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    233  * Be aware that large numbers can allow the driver to consume a lot of
    234  * kernel memory, especially on writes, and in degraded mode reads.
    235  *
    236  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    237  * a single 64K write will typically require 64K for the old data,
    238  * 64K for the old parity, and 64K for the new parity, for a total
    239  * of 192K (if the parity buffer is not re-used immediately).
    240  * Even it if is used immedately, that's still 128K, which when multiplied
    241  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    242  *
    243  * Now in degraded mode, for example, a 64K read on the above setup may
    244  * require data reconstruction, which will require *all* of the 4 remaining
    245  * disks to participate -- 4 * 32K/disk == 128K again.
    246  */
    247 
    248 #ifndef RAIDOUTSTANDING
    249 #define RAIDOUTSTANDING   6
    250 #endif
    251 
    252 #define RAIDLABELDEV(dev)	\
    253 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    254 
    255 /* declared here, and made public, for the benefit of KVM stuff.. */
    256 struct raid_softc *raid_softc;
    257 
    258 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    259 				     struct disklabel *));
    260 static void raidgetdisklabel __P((dev_t));
    261 static void raidmakedisklabel __P((struct raid_softc *));
    262 
    263 static int raidlock __P((struct raid_softc *));
    264 static void raidunlock __P((struct raid_softc *));
    265 
    266 static void rf_markalldirty __P((RF_Raid_t *));
    267 void rf_mountroot_hook __P((struct device *));
    268 
    269 struct device *raidrootdev;
    270 struct cfdata cf_raidrootdev;
    271 struct cfdriver cfdrv;
    272 /* XXX these should be moved up */
    273 #include "rf_configure.h"
    274 #include <sys/reboot.h>
    275 
    276 void rf_ReconThread __P((struct rf_recon_req *));
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    279 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    280 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    281 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    282 void rf_buildroothack __P((void *));
    283 
    284 RF_AutoConfig_t *rf_find_raid_components __P((void));
    285 void print_component_label __P((RF_ComponentLabel_t *));
    286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    290 				  RF_Raid_t *));
    291 int rf_set_autoconfig __P((RF_Raid_t *, int));
    292 int rf_set_rootpartition __P((RF_Raid_t *, int));
    293 void rf_release_all_vps __P((RF_ConfigSet_t *));
    294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    295 int rf_have_enough_components __P((RF_ConfigSet_t *));
    296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place */
    300 /* XXX ugly hack. */
    301 const char *raid_rooty = "raid0";
    302 extern struct device *booted_device;
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    311 	RF_ConfigSet_t *config_sets;
    312 
    313 #ifdef DEBUG
    314 	printf("raidattach: Asked for %d units\n", num);
    315 #endif
    316 
    317 	if (num <= 0) {
    318 #ifdef DIAGNOSTIC
    319 		panic("raidattach: count <= 0");
    320 #endif
    321 		return;
    322 	}
    323 	/* This is where all the initialization stuff gets done. */
    324 
    325 	numraid = num;
    326 
    327 	/* Make some space for requested number of units... */
    328 
    329 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    330 	if (raidPtrs == NULL) {
    331 		panic("raidPtrs is NULL!!\n");
    332 	}
    333 
    334 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    335 	if (rc) {
    336 		RF_PANIC();
    337 	}
    338 
    339 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    340 
    341 	for (i = 0; i < num; i++)
    342 		raidPtrs[i] = NULL;
    343 	rc = rf_BootRaidframe();
    344 	if (rc == 0)
    345 		printf("Kernelized RAIDframe activated\n");
    346 	else
    347 		panic("Serious error booting RAID!!\n");
    348 
    349 	/* put together some datastructures like the CCD device does.. This
    350 	 * lets us lock the device and what-not when it gets opened. */
    351 
    352 	raid_softc = (struct raid_softc *)
    353 		malloc(num * sizeof(struct raid_softc),
    354 		       M_RAIDFRAME, M_NOWAIT);
    355 	if (raid_softc == NULL) {
    356 		printf("WARNING: no memory for RAIDframe driver\n");
    357 		return;
    358 	}
    359 
    360 	bzero(raid_softc, num * sizeof(struct raid_softc));
    361 
    362 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    363 					      M_RAIDFRAME, M_NOWAIT);
    364 	if (raidrootdev == NULL) {
    365 		panic("No memory for RAIDframe driver!!?!?!\n");
    366 	}
    367 
    368 	for (raidID = 0; raidID < num; raidID++) {
    369 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    370 
    371 		raidrootdev[raidID].dv_class  = DV_DISK;
    372 		raidrootdev[raidID].dv_cfdata = NULL;
    373 		raidrootdev[raidID].dv_unit   = raidID;
    374 		raidrootdev[raidID].dv_parent = NULL;
    375 		raidrootdev[raidID].dv_flags  = 0;
    376 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    377 
    378 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    379 			  (RF_Raid_t *));
    380 		if (raidPtrs[raidID] == NULL) {
    381 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    382 			numraid = raidID;
    383 			return;
    384 		}
    385 	}
    386 
    387 if (raidautoconfig) {
    388 	/* 1. locate all RAID components on the system */
    389 
    390 #if DEBUG
    391 	printf("Searching for raid components...\n");
    392 #endif
    393 	ac_list = rf_find_raid_components();
    394 
    395 	/* 2. sort them into their respective sets */
    396 
    397 	config_sets = rf_create_auto_sets(ac_list);
    398 
    399 	/* 3. evaluate each set and configure the valid ones
    400 	   This gets done in rf_buildroothack() */
    401 
    402 	/* schedule the creation of the thread to do the
    403 	   "/ on RAID" stuff */
    404 
    405 	kthread_create(rf_buildroothack,config_sets);
    406 
    407 	/* 4. make sure we get our mud.. I mean root.. hooks in.. */
    408 	/* XXXX pick raid0 for now... and this should be only done
    409 	   if we find something that's bootable!!! */
    410 #if 0
    411 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    412 #endif
    413 	if (boothowto & RB_ASKNAME) {
    414 		/* We don't auto-config... */
    415 	} else {
    416 		/* They didn't ask, and we found something bootable... */
    417 		/* XXX pretend for now.. */
    418 #if 0
    419  		booted_device = &raidrootdev[0];
    420 #endif
    421 	}
    422 }
    423 
    424 }
    425 
    426 void
    427 rf_buildroothack(arg)
    428 	void *arg;
    429 {
    430 	RF_ConfigSet_t *config_sets = arg;
    431 	RF_ConfigSet_t *cset;
    432 	RF_ConfigSet_t *next_cset;
    433 	int retcode;
    434 	int raidID;
    435 	int rootID;
    436 	int num_root;
    437 
    438 	num_root = 0;
    439 	cset = config_sets;
    440 	while(cset != NULL ) {
    441 		next_cset = cset->next;
    442 		if (rf_have_enough_components(cset) &&
    443 		    cset->ac->clabel->autoconfigure==1) {
    444 			retcode = rf_auto_config_set(cset,&raidID);
    445 			if (!retcode) {
    446 				if (cset->rootable) {
    447 					rootID = raidID;
    448 					num_root++;
    449 				}
    450 			} else {
    451 				/* The autoconfig didn't work :( */
    452 #if DEBUG
    453 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    454 #endif
    455 				rf_release_all_vps(cset);
    456 #if DEBUG
    457 				printf("Done cleanup\n");
    458 #endif
    459 			}
    460 		} else {
    461 			/* we're not autoconfiguring this set...
    462 			   release the associated resources */
    463 #if DEBUG
    464 			printf("Releasing vp's\n");
    465 #endif
    466 			rf_release_all_vps(cset);
    467 #if DEBUG
    468 			printf("Done.\n");
    469 #endif
    470 		}
    471 		/* cleanup */
    472 #if DEBUG
    473 		printf("Cleaning up config set\n");
    474 #endif
    475 		rf_cleanup_config_set(cset);
    476 #if DEBUG
    477 		printf("Done cleanup\n");
    478 #endif
    479 		cset = next_cset;
    480 	}
    481 	if (boothowto & RB_ASKNAME) {
    482 		/* We don't auto-config... */
    483 	} else {
    484 		/* They didn't ask, and we found something bootable... */
    485 		/* XXX pretend for now.. */
    486 		if (num_root == 1) {
    487 #if 1
    488 			booted_device = &raidrootdev[rootID];
    489 #endif
    490 		} else if (num_root > 1) {
    491 			/* we can't guess.. require the user to answer... */
    492 			boothowto |= RB_ASKNAME;
    493 		}
    494 	}
    495 }
    496 
    497 
    498 int
    499 raidsize(dev)
    500 	dev_t   dev;
    501 {
    502 	struct raid_softc *rs;
    503 	struct disklabel *lp;
    504 	int     part, unit, omask, size;
    505 
    506 	unit = raidunit(dev);
    507 	if (unit >= numraid)
    508 		return (-1);
    509 	rs = &raid_softc[unit];
    510 
    511 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    512 		return (-1);
    513 
    514 	part = DISKPART(dev);
    515 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    516 	lp = rs->sc_dkdev.dk_label;
    517 
    518 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    519 		return (-1);
    520 
    521 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    522 		size = -1;
    523 	else
    524 		size = lp->d_partitions[part].p_size *
    525 		    (lp->d_secsize / DEV_BSIZE);
    526 
    527 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    528 		return (-1);
    529 
    530 	return (size);
    531 
    532 }
    533 
    534 int
    535 raiddump(dev, blkno, va, size)
    536 	dev_t   dev;
    537 	daddr_t blkno;
    538 	caddr_t va;
    539 	size_t  size;
    540 {
    541 	/* Not implemented. */
    542 	return ENXIO;
    543 }
    544 /* ARGSUSED */
    545 int
    546 raidopen(dev, flags, fmt, p)
    547 	dev_t   dev;
    548 	int     flags, fmt;
    549 	struct proc *p;
    550 {
    551 	int     unit = raidunit(dev);
    552 	struct raid_softc *rs;
    553 	struct disklabel *lp;
    554 	int     part, pmask;
    555 	int     error = 0;
    556 
    557 	if (unit >= numraid)
    558 		return (ENXIO);
    559 	rs = &raid_softc[unit];
    560 
    561 	if ((error = raidlock(rs)) != 0)
    562 		return (error);
    563 	lp = rs->sc_dkdev.dk_label;
    564 
    565 	part = DISKPART(dev);
    566 	pmask = (1 << part);
    567 
    568 	db1_printf(("Opening raid device number: %d partition: %d\n",
    569 		unit, part));
    570 
    571 
    572 	if ((rs->sc_flags & RAIDF_INITED) &&
    573 	    (rs->sc_dkdev.dk_openmask == 0))
    574 		raidgetdisklabel(dev);
    575 
    576 	/* make sure that this partition exists */
    577 
    578 	if (part != RAW_PART) {
    579 		db1_printf(("Not a raw partition..\n"));
    580 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    581 		    ((part >= lp->d_npartitions) ||
    582 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    583 			error = ENXIO;
    584 			raidunlock(rs);
    585 			db1_printf(("Bailing out...\n"));
    586 			return (error);
    587 		}
    588 	}
    589 	/* Prevent this unit from being unconfigured while open. */
    590 	switch (fmt) {
    591 	case S_IFCHR:
    592 		rs->sc_dkdev.dk_copenmask |= pmask;
    593 		break;
    594 
    595 	case S_IFBLK:
    596 		rs->sc_dkdev.dk_bopenmask |= pmask;
    597 		break;
    598 	}
    599 
    600 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    601 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    602 		/* First one... mark things as dirty... Note that we *MUST*
    603 		 have done a configure before this.  I DO NOT WANT TO BE
    604 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    605 		 THAT THEY BELONG TOGETHER!!!!! */
    606 		/* XXX should check to see if we're only open for reading
    607 		   here... If so, we needn't do this, but then need some
    608 		   other way of keeping track of what's happened.. */
    609 
    610 		rf_markalldirty( raidPtrs[unit] );
    611 	}
    612 
    613 
    614 	rs->sc_dkdev.dk_openmask =
    615 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    616 
    617 	raidunlock(rs);
    618 
    619 	return (error);
    620 
    621 
    622 }
    623 /* ARGSUSED */
    624 int
    625 raidclose(dev, flags, fmt, p)
    626 	dev_t   dev;
    627 	int     flags, fmt;
    628 	struct proc *p;
    629 {
    630 	int     unit = raidunit(dev);
    631 	struct raid_softc *rs;
    632 	int     error = 0;
    633 	int     part;
    634 
    635 	if (unit >= numraid)
    636 		return (ENXIO);
    637 	rs = &raid_softc[unit];
    638 
    639 	if ((error = raidlock(rs)) != 0)
    640 		return (error);
    641 
    642 	part = DISKPART(dev);
    643 
    644 	/* ...that much closer to allowing unconfiguration... */
    645 	switch (fmt) {
    646 	case S_IFCHR:
    647 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    648 		break;
    649 
    650 	case S_IFBLK:
    651 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    652 		break;
    653 	}
    654 	rs->sc_dkdev.dk_openmask =
    655 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    656 
    657 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    658 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    659 		/* Last one... device is not unconfigured yet.
    660 		   Device shutdown has taken care of setting the
    661 		   clean bits if RAIDF_INITED is not set
    662 		   mark things as clean... */
    663 #ifdef DEBUG
    664 		printf("Last one on raid%d.  Updating status.\n",unit);
    665 #endif
    666 		rf_update_component_labels( raidPtrs[unit] );
    667 	}
    668 
    669 	raidunlock(rs);
    670 	return (0);
    671 
    672 }
    673 
    674 void
    675 raidstrategy(bp)
    676 	register struct buf *bp;
    677 {
    678 	register int s;
    679 
    680 	unsigned int raidID = raidunit(bp->b_dev);
    681 	RF_Raid_t *raidPtr;
    682 	struct raid_softc *rs = &raid_softc[raidID];
    683 	struct disklabel *lp;
    684 	int     wlabel;
    685 
    686 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    687 		bp->b_error = ENXIO;
    688 		bp->b_flags = B_ERROR;
    689 		bp->b_resid = bp->b_bcount;
    690 		biodone(bp);
    691 		return;
    692 	}
    693 	if (raidID >= numraid || !raidPtrs[raidID]) {
    694 		bp->b_error = ENODEV;
    695 		bp->b_flags |= B_ERROR;
    696 		bp->b_resid = bp->b_bcount;
    697 		biodone(bp);
    698 		return;
    699 	}
    700 	raidPtr = raidPtrs[raidID];
    701 	if (!raidPtr->valid) {
    702 		bp->b_error = ENODEV;
    703 		bp->b_flags |= B_ERROR;
    704 		bp->b_resid = bp->b_bcount;
    705 		biodone(bp);
    706 		return;
    707 	}
    708 	if (bp->b_bcount == 0) {
    709 		db1_printf(("b_bcount is zero..\n"));
    710 		biodone(bp);
    711 		return;
    712 	}
    713 	lp = rs->sc_dkdev.dk_label;
    714 
    715 	/*
    716 	 * Do bounds checking and adjust transfer.  If there's an
    717 	 * error, the bounds check will flag that for us.
    718 	 */
    719 
    720 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    721 	if (DISKPART(bp->b_dev) != RAW_PART)
    722 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    723 			db1_printf(("Bounds check failed!!:%d %d\n",
    724 				(int) bp->b_blkno, (int) wlabel));
    725 			biodone(bp);
    726 			return;
    727 		}
    728 	s = splbio();
    729 
    730 	bp->b_resid = 0;
    731 
    732 	/* stuff it onto our queue */
    733 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    734 
    735 	raidstart(raidPtrs[raidID]);
    736 
    737 	splx(s);
    738 }
    739 /* ARGSUSED */
    740 int
    741 raidread(dev, uio, flags)
    742 	dev_t   dev;
    743 	struct uio *uio;
    744 	int     flags;
    745 {
    746 	int     unit = raidunit(dev);
    747 	struct raid_softc *rs;
    748 	int     part;
    749 
    750 	if (unit >= numraid)
    751 		return (ENXIO);
    752 	rs = &raid_softc[unit];
    753 
    754 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    755 		return (ENXIO);
    756 	part = DISKPART(dev);
    757 
    758 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    759 
    760 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    761 
    762 }
    763 /* ARGSUSED */
    764 int
    765 raidwrite(dev, uio, flags)
    766 	dev_t   dev;
    767 	struct uio *uio;
    768 	int     flags;
    769 {
    770 	int     unit = raidunit(dev);
    771 	struct raid_softc *rs;
    772 
    773 	if (unit >= numraid)
    774 		return (ENXIO);
    775 	rs = &raid_softc[unit];
    776 
    777 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    778 		return (ENXIO);
    779 	db1_printf(("raidwrite\n"));
    780 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    781 
    782 }
    783 
    784 int
    785 raidioctl(dev, cmd, data, flag, p)
    786 	dev_t   dev;
    787 	u_long  cmd;
    788 	caddr_t data;
    789 	int     flag;
    790 	struct proc *p;
    791 {
    792 	int     unit = raidunit(dev);
    793 	int     error = 0;
    794 	int     part, pmask;
    795 	struct raid_softc *rs;
    796 	RF_Config_t *k_cfg, *u_cfg;
    797 	RF_Raid_t *raidPtr;
    798 	RF_RaidDisk_t *diskPtr;
    799 	RF_AccTotals_t *totals;
    800 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    801 	u_char *specific_buf;
    802 	int retcode = 0;
    803 	int row;
    804 	int column;
    805 	struct rf_recon_req *rrcopy, *rr;
    806 	RF_ComponentLabel_t *clabel;
    807 	RF_ComponentLabel_t ci_label;
    808 	RF_ComponentLabel_t **clabel_ptr;
    809 	RF_SingleComponent_t *sparePtr,*componentPtr;
    810 	RF_SingleComponent_t hot_spare;
    811 	RF_SingleComponent_t component;
    812 	int i, j, d;
    813 
    814 	if (unit >= numraid)
    815 		return (ENXIO);
    816 	rs = &raid_softc[unit];
    817 	raidPtr = raidPtrs[unit];
    818 
    819 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    820 		(int) DISKPART(dev), (int) unit, (int) cmd));
    821 
    822 	/* Must be open for writes for these commands... */
    823 	switch (cmd) {
    824 	case DIOCSDINFO:
    825 	case DIOCWDINFO:
    826 	case DIOCWLABEL:
    827 		if ((flag & FWRITE) == 0)
    828 			return (EBADF);
    829 	}
    830 
    831 	/* Must be initialized for these... */
    832 	switch (cmd) {
    833 	case DIOCGDINFO:
    834 	case DIOCSDINFO:
    835 	case DIOCWDINFO:
    836 	case DIOCGPART:
    837 	case DIOCWLABEL:
    838 	case DIOCGDEFLABEL:
    839 	case RAIDFRAME_SHUTDOWN:
    840 	case RAIDFRAME_REWRITEPARITY:
    841 	case RAIDFRAME_GET_INFO:
    842 	case RAIDFRAME_RESET_ACCTOTALS:
    843 	case RAIDFRAME_GET_ACCTOTALS:
    844 	case RAIDFRAME_KEEP_ACCTOTALS:
    845 	case RAIDFRAME_GET_SIZE:
    846 	case RAIDFRAME_FAIL_DISK:
    847 	case RAIDFRAME_COPYBACK:
    848 	case RAIDFRAME_CHECK_RECON_STATUS:
    849 	case RAIDFRAME_GET_COMPONENT_LABEL:
    850 	case RAIDFRAME_SET_COMPONENT_LABEL:
    851 	case RAIDFRAME_ADD_HOT_SPARE:
    852 	case RAIDFRAME_REMOVE_HOT_SPARE:
    853 	case RAIDFRAME_INIT_LABELS:
    854 	case RAIDFRAME_REBUILD_IN_PLACE:
    855 	case RAIDFRAME_CHECK_PARITY:
    856 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    857 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    858 	case RAIDFRAME_SET_AUTOCONFIG:
    859 	case RAIDFRAME_SET_ROOT:
    860 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    861 			return (ENXIO);
    862 	}
    863 
    864 	switch (cmd) {
    865 
    866 		/* configure the system */
    867 	case RAIDFRAME_CONFIGURE:
    868 
    869 		if (raidPtr->valid) {
    870 			/* There is a valid RAID set running on this unit! */
    871 			printf("raid%d: Device already configured!\n",unit);
    872 		}
    873 
    874 		/* copy-in the configuration information */
    875 		/* data points to a pointer to the configuration structure */
    876 
    877 		u_cfg = *((RF_Config_t **) data);
    878 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    879 		if (k_cfg == NULL) {
    880 			return (ENOMEM);
    881 		}
    882 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    883 		    sizeof(RF_Config_t));
    884 		if (retcode) {
    885 			RF_Free(k_cfg, sizeof(RF_Config_t));
    886 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    887 				retcode));
    888 			return (retcode);
    889 		}
    890 		/* allocate a buffer for the layout-specific data, and copy it
    891 		 * in */
    892 		if (k_cfg->layoutSpecificSize) {
    893 			if (k_cfg->layoutSpecificSize > 10000) {
    894 				/* sanity check */
    895 				RF_Free(k_cfg, sizeof(RF_Config_t));
    896 				return (EINVAL);
    897 			}
    898 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    899 			    (u_char *));
    900 			if (specific_buf == NULL) {
    901 				RF_Free(k_cfg, sizeof(RF_Config_t));
    902 				return (ENOMEM);
    903 			}
    904 			retcode = copyin(k_cfg->layoutSpecific,
    905 			    (caddr_t) specific_buf,
    906 			    k_cfg->layoutSpecificSize);
    907 			if (retcode) {
    908 				RF_Free(k_cfg, sizeof(RF_Config_t));
    909 				RF_Free(specific_buf,
    910 					k_cfg->layoutSpecificSize);
    911 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    912 					retcode));
    913 				return (retcode);
    914 			}
    915 		} else
    916 			specific_buf = NULL;
    917 		k_cfg->layoutSpecific = specific_buf;
    918 
    919 		/* should do some kind of sanity check on the configuration.
    920 		 * Store the sum of all the bytes in the last byte? */
    921 
    922 		/* configure the system */
    923 
    924 		/*
    925 		 * Clear the entire RAID descriptor, just to make sure
    926 		 *  there is no stale data left in the case of a
    927 		 *  reconfiguration
    928 		 */
    929 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    930 		raidPtr->raidid = unit;
    931 
    932 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    933 
    934 		if (retcode == 0) {
    935 
    936 			/* allow this many simultaneous IO's to
    937 			   this RAID device */
    938 			raidPtr->openings = RAIDOUTSTANDING;
    939 
    940 			retcode = raidinit(dev, raidPtr, unit);
    941 			rf_markalldirty( raidPtr );
    942 		}
    943 		/* free the buffers.  No return code here. */
    944 		if (k_cfg->layoutSpecificSize) {
    945 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    946 		}
    947 		RF_Free(k_cfg, sizeof(RF_Config_t));
    948 
    949 		return (retcode);
    950 
    951 		/* shutdown the system */
    952 	case RAIDFRAME_SHUTDOWN:
    953 
    954 		if ((error = raidlock(rs)) != 0)
    955 			return (error);
    956 
    957 		/*
    958 		 * If somebody has a partition mounted, we shouldn't
    959 		 * shutdown.
    960 		 */
    961 
    962 		part = DISKPART(dev);
    963 		pmask = (1 << part);
    964 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    965 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    966 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    967 			raidunlock(rs);
    968 			return (EBUSY);
    969 		}
    970 
    971 		retcode = rf_Shutdown(raidPtr);
    972 
    973 		pool_destroy(&rs->sc_cbufpool);
    974 
    975 		/* It's no longer initialized... */
    976 		rs->sc_flags &= ~RAIDF_INITED;
    977 
    978 		/* Detach the disk. */
    979 		disk_detach(&rs->sc_dkdev);
    980 
    981 		raidunlock(rs);
    982 
    983 		return (retcode);
    984 	case RAIDFRAME_GET_COMPONENT_LABEL:
    985 		clabel_ptr = (RF_ComponentLabel_t **) data;
    986 		/* need to read the component label for the disk indicated
    987 		   by row,column in clabel */
    988 
    989 		/* For practice, let's get it directly fromdisk, rather
    990 		   than from the in-core copy */
    991 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    992 			   (RF_ComponentLabel_t *));
    993 		if (clabel == NULL)
    994 			return (ENOMEM);
    995 
    996 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    997 
    998 		retcode = copyin( *clabel_ptr, clabel,
    999 				  sizeof(RF_ComponentLabel_t));
   1000 
   1001 		if (retcode) {
   1002 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1003 			return(retcode);
   1004 		}
   1005 
   1006 		row = clabel->row;
   1007 		column = clabel->column;
   1008 
   1009 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1010 		    (column < 0) || (column >= raidPtr->numCol)) {
   1011 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1012 			return(EINVAL);
   1013 		}
   1014 
   1015 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1016 				raidPtr->raid_cinfo[row][column].ci_vp,
   1017 				clabel );
   1018 
   1019 		retcode = copyout((caddr_t) clabel,
   1020 				  (caddr_t) *clabel_ptr,
   1021 				  sizeof(RF_ComponentLabel_t));
   1022 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1023 		return (retcode);
   1024 
   1025 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1026 		clabel = (RF_ComponentLabel_t *) data;
   1027 
   1028 		/* XXX check the label for valid stuff... */
   1029 		/* Note that some things *should not* get modified --
   1030 		   the user should be re-initing the labels instead of
   1031 		   trying to patch things.
   1032 		   */
   1033 
   1034 		printf("Got component label:\n");
   1035 		printf("Version: %d\n",clabel->version);
   1036 		printf("Serial Number: %d\n",clabel->serial_number);
   1037 		printf("Mod counter: %d\n",clabel->mod_counter);
   1038 		printf("Row: %d\n", clabel->row);
   1039 		printf("Column: %d\n", clabel->column);
   1040 		printf("Num Rows: %d\n", clabel->num_rows);
   1041 		printf("Num Columns: %d\n", clabel->num_columns);
   1042 		printf("Clean: %d\n", clabel->clean);
   1043 		printf("Status: %d\n", clabel->status);
   1044 
   1045 		row = clabel->row;
   1046 		column = clabel->column;
   1047 
   1048 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1049 		    (column < 0) || (column >= raidPtr->numCol)) {
   1050 			return(EINVAL);
   1051 		}
   1052 
   1053 		/* XXX this isn't allowed to do anything for now :-) */
   1054 
   1055 		/* XXX and before it is, we need to fill in the rest
   1056 		   of the fields!?!?!?! */
   1057 #if 0
   1058 		raidwrite_component_label(
   1059                             raidPtr->Disks[row][column].dev,
   1060 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1061 			    clabel );
   1062 #endif
   1063 		return (0);
   1064 
   1065 	case RAIDFRAME_INIT_LABELS:
   1066 		clabel = (RF_ComponentLabel_t *) data;
   1067 		/*
   1068 		   we only want the serial number from
   1069 		   the above.  We get all the rest of the information
   1070 		   from the config that was used to create this RAID
   1071 		   set.
   1072 		   */
   1073 
   1074 		raidPtr->serial_number = clabel->serial_number;
   1075 
   1076 		raid_init_component_label(raidPtr, &ci_label);
   1077 		ci_label.serial_number = clabel->serial_number;
   1078 
   1079 		for(row=0;row<raidPtr->numRow;row++) {
   1080 			ci_label.row = row;
   1081 			for(column=0;column<raidPtr->numCol;column++) {
   1082 				diskPtr = &raidPtr->Disks[row][column];
   1083 				ci_label.partitionSize = diskPtr->partitionSize;
   1084 				ci_label.column = column;
   1085 				raidwrite_component_label(
   1086 				  raidPtr->Disks[row][column].dev,
   1087 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1088 				  &ci_label );
   1089 			}
   1090 		}
   1091 
   1092 		return (retcode);
   1093 	case RAIDFRAME_SET_AUTOCONFIG:
   1094 		d = rf_set_autoconfig(raidPtr, *data);
   1095 		printf("New autoconfig value is: %d\n", d);
   1096 		*data = d;
   1097 		return (retcode);
   1098 
   1099 	case RAIDFRAME_SET_ROOT:
   1100 		d = rf_set_rootpartition(raidPtr, *data);
   1101 		printf("New rootpartition value is: %d\n", d);
   1102 		*data = d;
   1103 		return (retcode);
   1104 
   1105 		/* initialize all parity */
   1106 	case RAIDFRAME_REWRITEPARITY:
   1107 
   1108 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1109 			/* Parity for RAID 0 is trivially correct */
   1110 			raidPtr->parity_good = RF_RAID_CLEAN;
   1111 			return(0);
   1112 		}
   1113 
   1114 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1115 			/* Re-write is already in progress! */
   1116 			return(EINVAL);
   1117 		}
   1118 
   1119 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1120 					   rf_RewriteParityThread,
   1121 					   raidPtr,"raid_parity");
   1122 		return (retcode);
   1123 
   1124 
   1125 	case RAIDFRAME_ADD_HOT_SPARE:
   1126 		sparePtr = (RF_SingleComponent_t *) data;
   1127 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1128 		printf("Adding spare\n");
   1129 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1130 		return(retcode);
   1131 
   1132 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1133 		return(retcode);
   1134 
   1135 	case RAIDFRAME_REBUILD_IN_PLACE:
   1136 
   1137 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1138 			/* Can't do this on a RAID 0!! */
   1139 			return(EINVAL);
   1140 		}
   1141 
   1142 		if (raidPtr->recon_in_progress == 1) {
   1143 			/* a reconstruct is already in progress! */
   1144 			return(EINVAL);
   1145 		}
   1146 
   1147 		componentPtr = (RF_SingleComponent_t *) data;
   1148 		memcpy( &component, componentPtr,
   1149 			sizeof(RF_SingleComponent_t));
   1150 		row = component.row;
   1151 		column = component.column;
   1152 		printf("Rebuild: %d %d\n",row, column);
   1153 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1154 		    (column < 0) || (column >= raidPtr->numCol)) {
   1155 			return(EINVAL);
   1156 		}
   1157 
   1158 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1159 		if (rrcopy == NULL)
   1160 			return(ENOMEM);
   1161 
   1162 		rrcopy->raidPtr = (void *) raidPtr;
   1163 		rrcopy->row = row;
   1164 		rrcopy->col = column;
   1165 
   1166 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1167 					   rf_ReconstructInPlaceThread,
   1168 					   rrcopy,"raid_reconip");
   1169 		return(retcode);
   1170 
   1171 	case RAIDFRAME_GET_INFO:
   1172 		if (!raidPtr->valid)
   1173 			return (ENODEV);
   1174 		ucfgp = (RF_DeviceConfig_t **) data;
   1175 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1176 			  (RF_DeviceConfig_t *));
   1177 		if (d_cfg == NULL)
   1178 			return (ENOMEM);
   1179 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1180 		d_cfg->rows = raidPtr->numRow;
   1181 		d_cfg->cols = raidPtr->numCol;
   1182 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1183 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1184 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1185 			return (ENOMEM);
   1186 		}
   1187 		d_cfg->nspares = raidPtr->numSpare;
   1188 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1189 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1190 			return (ENOMEM);
   1191 		}
   1192 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1193 		d = 0;
   1194 		for (i = 0; i < d_cfg->rows; i++) {
   1195 			for (j = 0; j < d_cfg->cols; j++) {
   1196 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1197 				d++;
   1198 			}
   1199 		}
   1200 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1201 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1202 		}
   1203 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1204 				  sizeof(RF_DeviceConfig_t));
   1205 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1206 
   1207 		return (retcode);
   1208 
   1209 	case RAIDFRAME_CHECK_PARITY:
   1210 		*(int *) data = raidPtr->parity_good;
   1211 		return (0);
   1212 
   1213 	case RAIDFRAME_RESET_ACCTOTALS:
   1214 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1215 		return (0);
   1216 
   1217 	case RAIDFRAME_GET_ACCTOTALS:
   1218 		totals = (RF_AccTotals_t *) data;
   1219 		*totals = raidPtr->acc_totals;
   1220 		return (0);
   1221 
   1222 	case RAIDFRAME_KEEP_ACCTOTALS:
   1223 		raidPtr->keep_acc_totals = *(int *)data;
   1224 		return (0);
   1225 
   1226 	case RAIDFRAME_GET_SIZE:
   1227 		*(int *) data = raidPtr->totalSectors;
   1228 		return (0);
   1229 
   1230 		/* fail a disk & optionally start reconstruction */
   1231 	case RAIDFRAME_FAIL_DISK:
   1232 
   1233 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1234 			/* Can't do this on a RAID 0!! */
   1235 			return(EINVAL);
   1236 		}
   1237 
   1238 		rr = (struct rf_recon_req *) data;
   1239 
   1240 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1241 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1242 			return (EINVAL);
   1243 
   1244 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1245 		       unit, rr->row, rr->col);
   1246 
   1247 		/* make a copy of the recon request so that we don't rely on
   1248 		 * the user's buffer */
   1249 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1250 		if (rrcopy == NULL)
   1251 			return(ENOMEM);
   1252 		bcopy(rr, rrcopy, sizeof(*rr));
   1253 		rrcopy->raidPtr = (void *) raidPtr;
   1254 
   1255 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1256 					   rf_ReconThread,
   1257 					   rrcopy,"raid_recon");
   1258 		return (0);
   1259 
   1260 		/* invoke a copyback operation after recon on whatever disk
   1261 		 * needs it, if any */
   1262 	case RAIDFRAME_COPYBACK:
   1263 
   1264 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1265 			/* This makes no sense on a RAID 0!! */
   1266 			return(EINVAL);
   1267 		}
   1268 
   1269 		if (raidPtr->copyback_in_progress == 1) {
   1270 			/* Copyback is already in progress! */
   1271 			return(EINVAL);
   1272 		}
   1273 
   1274 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1275 					   rf_CopybackThread,
   1276 					   raidPtr,"raid_copyback");
   1277 		return (retcode);
   1278 
   1279 		/* return the percentage completion of reconstruction */
   1280 	case RAIDFRAME_CHECK_RECON_STATUS:
   1281 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1282 			/* This makes no sense on a RAID 0 */
   1283 			return(EINVAL);
   1284 		}
   1285 		row = 0; /* XXX we only consider a single row... */
   1286 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1287 			*(int *) data = 100;
   1288 		else
   1289 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1290 		return (0);
   1291 
   1292 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1293 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1294 			/* This makes no sense on a RAID 0 */
   1295 			return(EINVAL);
   1296 		}
   1297 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1298 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1299 		} else {
   1300 			*(int *) data = 100;
   1301 		}
   1302 		return (0);
   1303 
   1304 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1305 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1306 			/* This makes no sense on a RAID 0 */
   1307 			return(EINVAL);
   1308 		}
   1309 		if (raidPtr->copyback_in_progress == 1) {
   1310 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1311 				raidPtr->Layout.numStripe;
   1312 		} else {
   1313 			*(int *) data = 100;
   1314 		}
   1315 		return (0);
   1316 
   1317 
   1318 		/* the sparetable daemon calls this to wait for the kernel to
   1319 		 * need a spare table. this ioctl does not return until a
   1320 		 * spare table is needed. XXX -- calling mpsleep here in the
   1321 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1322 		 * -- I should either compute the spare table in the kernel,
   1323 		 * or have a different -- XXX XXX -- interface (a different
   1324 		 * character device) for delivering the table     -- XXX */
   1325 #if 0
   1326 	case RAIDFRAME_SPARET_WAIT:
   1327 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1328 		while (!rf_sparet_wait_queue)
   1329 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1330 		waitreq = rf_sparet_wait_queue;
   1331 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1332 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1333 
   1334 		/* structure assignment */
   1335 		*((RF_SparetWait_t *) data) = *waitreq;
   1336 
   1337 		RF_Free(waitreq, sizeof(*waitreq));
   1338 		return (0);
   1339 
   1340 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1341 		 * code in it that will cause the dameon to exit */
   1342 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1343 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1344 		waitreq->fcol = -1;
   1345 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1346 		waitreq->next = rf_sparet_wait_queue;
   1347 		rf_sparet_wait_queue = waitreq;
   1348 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1349 		wakeup(&rf_sparet_wait_queue);
   1350 		return (0);
   1351 
   1352 		/* used by the spare table daemon to deliver a spare table
   1353 		 * into the kernel */
   1354 	case RAIDFRAME_SEND_SPARET:
   1355 
   1356 		/* install the spare table */
   1357 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1358 
   1359 		/* respond to the requestor.  the return status of the spare
   1360 		 * table installation is passed in the "fcol" field */
   1361 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1362 		waitreq->fcol = retcode;
   1363 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1364 		waitreq->next = rf_sparet_resp_queue;
   1365 		rf_sparet_resp_queue = waitreq;
   1366 		wakeup(&rf_sparet_resp_queue);
   1367 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1368 
   1369 		return (retcode);
   1370 #endif
   1371 
   1372 	default:
   1373 		break; /* fall through to the os-specific code below */
   1374 
   1375 	}
   1376 
   1377 	if (!raidPtr->valid)
   1378 		return (EINVAL);
   1379 
   1380 	/*
   1381 	 * Add support for "regular" device ioctls here.
   1382 	 */
   1383 
   1384 	switch (cmd) {
   1385 	case DIOCGDINFO:
   1386 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1387 		break;
   1388 
   1389 	case DIOCGPART:
   1390 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1391 		((struct partinfo *) data)->part =
   1392 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1393 		break;
   1394 
   1395 	case DIOCWDINFO:
   1396 	case DIOCSDINFO:
   1397 		if ((error = raidlock(rs)) != 0)
   1398 			return (error);
   1399 
   1400 		rs->sc_flags |= RAIDF_LABELLING;
   1401 
   1402 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1403 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1404 		if (error == 0) {
   1405 			if (cmd == DIOCWDINFO)
   1406 				error = writedisklabel(RAIDLABELDEV(dev),
   1407 				    raidstrategy, rs->sc_dkdev.dk_label,
   1408 				    rs->sc_dkdev.dk_cpulabel);
   1409 		}
   1410 		rs->sc_flags &= ~RAIDF_LABELLING;
   1411 
   1412 		raidunlock(rs);
   1413 
   1414 		if (error)
   1415 			return (error);
   1416 		break;
   1417 
   1418 	case DIOCWLABEL:
   1419 		if (*(int *) data != 0)
   1420 			rs->sc_flags |= RAIDF_WLABEL;
   1421 		else
   1422 			rs->sc_flags &= ~RAIDF_WLABEL;
   1423 		break;
   1424 
   1425 	case DIOCGDEFLABEL:
   1426 		raidgetdefaultlabel(raidPtr, rs,
   1427 		    (struct disklabel *) data);
   1428 		break;
   1429 
   1430 	default:
   1431 		retcode = ENOTTY;
   1432 	}
   1433 	return (retcode);
   1434 
   1435 }
   1436 
   1437 
   1438 /* raidinit -- complete the rest of the initialization for the
   1439    RAIDframe device.  */
   1440 
   1441 
   1442 static int
   1443 raidinit(dev, raidPtr, unit)
   1444 	dev_t   dev;
   1445 	RF_Raid_t *raidPtr;
   1446 	int     unit;
   1447 {
   1448 	int     retcode;
   1449 	struct raid_softc *rs;
   1450 
   1451 	retcode = 0;
   1452 
   1453 	rs = &raid_softc[unit];
   1454 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1455 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1456 
   1457 
   1458 	/* XXX should check return code first... */
   1459 	rs->sc_flags |= RAIDF_INITED;
   1460 
   1461 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1462 
   1463 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1464 
   1465 	/* disk_attach actually creates space for the CPU disklabel, among
   1466 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1467 	 * with disklabels. */
   1468 
   1469 	disk_attach(&rs->sc_dkdev);
   1470 
   1471 	/* XXX There may be a weird interaction here between this, and
   1472 	 * protectedSectors, as used in RAIDframe.  */
   1473 
   1474 	rs->sc_size = raidPtr->totalSectors;
   1475 	rs->sc_dev = dev;
   1476 
   1477 	return (retcode);
   1478 }
   1479 
   1480 /* wake up the daemon & tell it to get us a spare table
   1481  * XXX
   1482  * the entries in the queues should be tagged with the raidPtr
   1483  * so that in the extremely rare case that two recons happen at once,
   1484  * we know for which device were requesting a spare table
   1485  * XXX
   1486  *
   1487  * XXX This code is not currently used. GO
   1488  */
   1489 int
   1490 rf_GetSpareTableFromDaemon(req)
   1491 	RF_SparetWait_t *req;
   1492 {
   1493 	int     retcode;
   1494 
   1495 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1496 	req->next = rf_sparet_wait_queue;
   1497 	rf_sparet_wait_queue = req;
   1498 	wakeup(&rf_sparet_wait_queue);
   1499 
   1500 	/* mpsleep unlocks the mutex */
   1501 	while (!rf_sparet_resp_queue) {
   1502 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1503 		    "raidframe getsparetable", 0);
   1504 	}
   1505 	req = rf_sparet_resp_queue;
   1506 	rf_sparet_resp_queue = req->next;
   1507 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1508 
   1509 	retcode = req->fcol;
   1510 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1511 					 * alloc'd */
   1512 	return (retcode);
   1513 }
   1514 
   1515 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1516  * bp & passes it down.
   1517  * any calls originating in the kernel must use non-blocking I/O
   1518  * do some extra sanity checking to return "appropriate" error values for
   1519  * certain conditions (to make some standard utilities work)
   1520  *
   1521  * Formerly known as: rf_DoAccessKernel
   1522  */
   1523 void
   1524 raidstart(raidPtr)
   1525 	RF_Raid_t *raidPtr;
   1526 {
   1527 	RF_SectorCount_t num_blocks, pb, sum;
   1528 	RF_RaidAddr_t raid_addr;
   1529 	int     retcode;
   1530 	struct partition *pp;
   1531 	daddr_t blocknum;
   1532 	int     unit;
   1533 	struct raid_softc *rs;
   1534 	int     do_async;
   1535 	struct buf *bp;
   1536 
   1537 	unit = raidPtr->raidid;
   1538 	rs = &raid_softc[unit];
   1539 
   1540 	/* quick check to see if anything has died recently */
   1541 	RF_LOCK_MUTEX(raidPtr->mutex);
   1542 	if (raidPtr->numNewFailures > 0) {
   1543 		rf_update_component_labels(raidPtr);
   1544 		raidPtr->numNewFailures--;
   1545 	}
   1546 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1547 
   1548 	/* Check to see if we're at the limit... */
   1549 	RF_LOCK_MUTEX(raidPtr->mutex);
   1550 	while (raidPtr->openings > 0) {
   1551 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1552 
   1553 		/* get the next item, if any, from the queue */
   1554 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1555 			/* nothing more to do */
   1556 			return;
   1557 		}
   1558 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1559 
   1560 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1561 		 * partition.. Need to make it absolute to the underlying
   1562 		 * device.. */
   1563 
   1564 		blocknum = bp->b_blkno;
   1565 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1566 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1567 			blocknum += pp->p_offset;
   1568 		}
   1569 
   1570 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1571 			    (int) blocknum));
   1572 
   1573 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1574 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1575 
   1576 		/* *THIS* is where we adjust what block we're going to...
   1577 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1578 		raid_addr = blocknum;
   1579 
   1580 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1581 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1582 		sum = raid_addr + num_blocks + pb;
   1583 		if (1 || rf_debugKernelAccess) {
   1584 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1585 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1586 				    (int) pb, (int) bp->b_resid));
   1587 		}
   1588 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1589 		    || (sum < num_blocks) || (sum < pb)) {
   1590 			bp->b_error = ENOSPC;
   1591 			bp->b_flags |= B_ERROR;
   1592 			bp->b_resid = bp->b_bcount;
   1593 			biodone(bp);
   1594 			RF_LOCK_MUTEX(raidPtr->mutex);
   1595 			continue;
   1596 		}
   1597 		/*
   1598 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1599 		 */
   1600 
   1601 		if (bp->b_bcount & raidPtr->sectorMask) {
   1602 			bp->b_error = EINVAL;
   1603 			bp->b_flags |= B_ERROR;
   1604 			bp->b_resid = bp->b_bcount;
   1605 			biodone(bp);
   1606 			RF_LOCK_MUTEX(raidPtr->mutex);
   1607 			continue;
   1608 
   1609 		}
   1610 		db1_printf(("Calling DoAccess..\n"));
   1611 
   1612 
   1613 		RF_LOCK_MUTEX(raidPtr->mutex);
   1614 		raidPtr->openings--;
   1615 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1616 
   1617 		/*
   1618 		 * Everything is async.
   1619 		 */
   1620 		do_async = 1;
   1621 
   1622 		/* don't ever condition on bp->b_flags & B_WRITE.
   1623 		 * always condition on B_READ instead */
   1624 
   1625 		/* XXX we're still at splbio() here... do we *really*
   1626 		   need to be? */
   1627 
   1628 
   1629 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1630 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1631 				      do_async, raid_addr, num_blocks,
   1632 				      bp->b_un.b_addr, bp, NULL, NULL,
   1633 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1634 
   1635 
   1636 		RF_LOCK_MUTEX(raidPtr->mutex);
   1637 	}
   1638 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1639 }
   1640 
   1641 
   1642 
   1643 
   1644 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1645 
   1646 int
   1647 rf_DispatchKernelIO(queue, req)
   1648 	RF_DiskQueue_t *queue;
   1649 	RF_DiskQueueData_t *req;
   1650 {
   1651 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1652 	struct buf *bp;
   1653 	struct raidbuf *raidbp = NULL;
   1654 	struct raid_softc *rs;
   1655 	int     unit;
   1656 	int s;
   1657 
   1658 	s=0;
   1659 	/* s = splbio();*/ /* want to test this */
   1660 	/* XXX along with the vnode, we also need the softc associated with
   1661 	 * this device.. */
   1662 
   1663 	req->queue = queue;
   1664 
   1665 	unit = queue->raidPtr->raidid;
   1666 
   1667 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1668 
   1669 	if (unit >= numraid) {
   1670 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1671 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1672 	}
   1673 	rs = &raid_softc[unit];
   1674 
   1675 	/* XXX is this the right place? */
   1676 	disk_busy(&rs->sc_dkdev);
   1677 
   1678 	bp = req->bp;
   1679 #if 1
   1680 	/* XXX when there is a physical disk failure, someone is passing us a
   1681 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1682 	 * without taking a performance hit... (not sure where the real bug
   1683 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1684 
   1685 	if (bp->b_flags & B_ERROR) {
   1686 		bp->b_flags &= ~B_ERROR;
   1687 	}
   1688 	if (bp->b_error != 0) {
   1689 		bp->b_error = 0;
   1690 	}
   1691 #endif
   1692 	raidbp = RAIDGETBUF(rs);
   1693 
   1694 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1695 
   1696 	/*
   1697 	 * context for raidiodone
   1698 	 */
   1699 	raidbp->rf_obp = bp;
   1700 	raidbp->req = req;
   1701 
   1702 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1703 
   1704 	switch (req->type) {
   1705 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1706 		/* XXX need to do something extra here.. */
   1707 		/* I'm leaving this in, as I've never actually seen it used,
   1708 		 * and I'd like folks to report it... GO */
   1709 		printf(("WAKEUP CALLED\n"));
   1710 		queue->numOutstanding++;
   1711 
   1712 		/* XXX need to glue the original buffer into this??  */
   1713 
   1714 		KernelWakeupFunc(&raidbp->rf_buf);
   1715 		break;
   1716 
   1717 	case RF_IO_TYPE_READ:
   1718 	case RF_IO_TYPE_WRITE:
   1719 
   1720 		if (req->tracerec) {
   1721 			RF_ETIMER_START(req->tracerec->timer);
   1722 		}
   1723 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1724 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1725 		    req->sectorOffset, req->numSector,
   1726 		    req->buf, KernelWakeupFunc, (void *) req,
   1727 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1728 
   1729 		if (rf_debugKernelAccess) {
   1730 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1731 				(long) bp->b_blkno));
   1732 		}
   1733 		queue->numOutstanding++;
   1734 		queue->last_deq_sector = req->sectorOffset;
   1735 		/* acc wouldn't have been let in if there were any pending
   1736 		 * reqs at any other priority */
   1737 		queue->curPriority = req->priority;
   1738 
   1739 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1740 			req->type, unit, queue->row, queue->col));
   1741 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1742 			(int) req->sectorOffset, (int) req->numSector,
   1743 			(int) (req->numSector <<
   1744 			    queue->raidPtr->logBytesPerSector),
   1745 			(int) queue->raidPtr->logBytesPerSector));
   1746 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1747 			raidbp->rf_buf.b_vp->v_numoutput++;
   1748 		}
   1749 		VOP_STRATEGY(&raidbp->rf_buf);
   1750 
   1751 		break;
   1752 
   1753 	default:
   1754 		panic("bad req->type in rf_DispatchKernelIO");
   1755 	}
   1756 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1757 	/* splx(s); */ /* want to test this */
   1758 	return (0);
   1759 }
   1760 /* this is the callback function associated with a I/O invoked from
   1761    kernel code.
   1762  */
   1763 static void
   1764 KernelWakeupFunc(vbp)
   1765 	struct buf *vbp;
   1766 {
   1767 	RF_DiskQueueData_t *req = NULL;
   1768 	RF_DiskQueue_t *queue;
   1769 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1770 	struct buf *bp;
   1771 	struct raid_softc *rs;
   1772 	int     unit;
   1773 	register int s;
   1774 
   1775 	s = splbio();
   1776 	db1_printf(("recovering the request queue:\n"));
   1777 	req = raidbp->req;
   1778 
   1779 	bp = raidbp->rf_obp;
   1780 
   1781 	queue = (RF_DiskQueue_t *) req->queue;
   1782 
   1783 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1784 		bp->b_flags |= B_ERROR;
   1785 		bp->b_error = raidbp->rf_buf.b_error ?
   1786 		    raidbp->rf_buf.b_error : EIO;
   1787 	}
   1788 
   1789 	/* XXX methinks this could be wrong... */
   1790 #if 1
   1791 	bp->b_resid = raidbp->rf_buf.b_resid;
   1792 #endif
   1793 
   1794 	if (req->tracerec) {
   1795 		RF_ETIMER_STOP(req->tracerec->timer);
   1796 		RF_ETIMER_EVAL(req->tracerec->timer);
   1797 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1798 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1799 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1800 		req->tracerec->num_phys_ios++;
   1801 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1802 	}
   1803 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1804 
   1805 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1806 
   1807 
   1808 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1809 	 * ballistic, and mark the component as hosed... */
   1810 
   1811 	if (bp->b_flags & B_ERROR) {
   1812 		/* Mark the disk as dead */
   1813 		/* but only mark it once... */
   1814 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1815 		    rf_ds_optimal) {
   1816 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1817 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1818 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1819 			    rf_ds_failed;
   1820 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1821 			queue->raidPtr->numFailures++;
   1822 			queue->raidPtr->numNewFailures++;
   1823 			/* XXX here we should bump the version number for each component, and write that data out */
   1824 		} else {	/* Disk is already dead... */
   1825 			/* printf("Disk already marked as dead!\n"); */
   1826 		}
   1827 
   1828 	}
   1829 
   1830 	rs = &raid_softc[unit];
   1831 	RAIDPUTBUF(rs, raidbp);
   1832 
   1833 
   1834 	if (bp->b_resid == 0) {
   1835 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1836 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1837 	}
   1838 
   1839 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1840 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1841 
   1842 	splx(s);
   1843 }
   1844 
   1845 
   1846 
   1847 /*
   1848  * initialize a buf structure for doing an I/O in the kernel.
   1849  */
   1850 static void
   1851 InitBP(
   1852     struct buf * bp,
   1853     struct vnode * b_vp,
   1854     unsigned rw_flag,
   1855     dev_t dev,
   1856     RF_SectorNum_t startSect,
   1857     RF_SectorCount_t numSect,
   1858     caddr_t buf,
   1859     void (*cbFunc) (struct buf *),
   1860     void *cbArg,
   1861     int logBytesPerSector,
   1862     struct proc * b_proc)
   1863 {
   1864 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1865 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1866 	bp->b_bcount = numSect << logBytesPerSector;
   1867 	bp->b_bufsize = bp->b_bcount;
   1868 	bp->b_error = 0;
   1869 	bp->b_dev = dev;
   1870 	bp->b_un.b_addr = buf;
   1871 	bp->b_blkno = startSect;
   1872 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1873 	if (bp->b_bcount == 0) {
   1874 		panic("bp->b_bcount is zero in InitBP!!\n");
   1875 	}
   1876 	bp->b_proc = b_proc;
   1877 	bp->b_iodone = cbFunc;
   1878 	bp->b_vp = b_vp;
   1879 
   1880 }
   1881 
   1882 static void
   1883 raidgetdefaultlabel(raidPtr, rs, lp)
   1884 	RF_Raid_t *raidPtr;
   1885 	struct raid_softc *rs;
   1886 	struct disklabel *lp;
   1887 {
   1888 	db1_printf(("Building a default label...\n"));
   1889 	bzero(lp, sizeof(*lp));
   1890 
   1891 	/* fabricate a label... */
   1892 	lp->d_secperunit = raidPtr->totalSectors;
   1893 	lp->d_secsize = raidPtr->bytesPerSector;
   1894 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1895 	lp->d_ntracks = 1;
   1896 	lp->d_ncylinders = raidPtr->totalSectors /
   1897 		(lp->d_nsectors * lp->d_ntracks);
   1898 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1899 
   1900 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1901 	lp->d_type = DTYPE_RAID;
   1902 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1903 	lp->d_rpm = 3600;
   1904 	lp->d_interleave = 1;
   1905 	lp->d_flags = 0;
   1906 
   1907 	lp->d_partitions[RAW_PART].p_offset = 0;
   1908 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1909 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1910 	lp->d_npartitions = RAW_PART + 1;
   1911 
   1912 	lp->d_magic = DISKMAGIC;
   1913 	lp->d_magic2 = DISKMAGIC;
   1914 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1915 
   1916 }
   1917 /*
   1918  * Read the disklabel from the raid device.  If one is not present, fake one
   1919  * up.
   1920  */
   1921 static void
   1922 raidgetdisklabel(dev)
   1923 	dev_t   dev;
   1924 {
   1925 	int     unit = raidunit(dev);
   1926 	struct raid_softc *rs = &raid_softc[unit];
   1927 	char   *errstring;
   1928 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1929 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1930 	RF_Raid_t *raidPtr;
   1931 
   1932 	db1_printf(("Getting the disklabel...\n"));
   1933 
   1934 	bzero(clp, sizeof(*clp));
   1935 
   1936 	raidPtr = raidPtrs[unit];
   1937 
   1938 	raidgetdefaultlabel(raidPtr, rs, lp);
   1939 
   1940 	/*
   1941 	 * Call the generic disklabel extraction routine.
   1942 	 */
   1943 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1944 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1945 	if (errstring)
   1946 		raidmakedisklabel(rs);
   1947 	else {
   1948 		int     i;
   1949 		struct partition *pp;
   1950 
   1951 		/*
   1952 		 * Sanity check whether the found disklabel is valid.
   1953 		 *
   1954 		 * This is necessary since total size of the raid device
   1955 		 * may vary when an interleave is changed even though exactly
   1956 		 * same componets are used, and old disklabel may used
   1957 		 * if that is found.
   1958 		 */
   1959 		if (lp->d_secperunit != rs->sc_size)
   1960 			printf("WARNING: %s: "
   1961 			    "total sector size in disklabel (%d) != "
   1962 			    "the size of raid (%ld)\n", rs->sc_xname,
   1963 			    lp->d_secperunit, (long) rs->sc_size);
   1964 		for (i = 0; i < lp->d_npartitions; i++) {
   1965 			pp = &lp->d_partitions[i];
   1966 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1967 				printf("WARNING: %s: end of partition `%c' "
   1968 				    "exceeds the size of raid (%ld)\n",
   1969 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1970 		}
   1971 	}
   1972 
   1973 }
   1974 /*
   1975  * Take care of things one might want to take care of in the event
   1976  * that a disklabel isn't present.
   1977  */
   1978 static void
   1979 raidmakedisklabel(rs)
   1980 	struct raid_softc *rs;
   1981 {
   1982 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1983 	db1_printf(("Making a label..\n"));
   1984 
   1985 	/*
   1986 	 * For historical reasons, if there's no disklabel present
   1987 	 * the raw partition must be marked FS_BSDFFS.
   1988 	 */
   1989 
   1990 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1991 
   1992 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1993 
   1994 	lp->d_checksum = dkcksum(lp);
   1995 }
   1996 /*
   1997  * Lookup the provided name in the filesystem.  If the file exists,
   1998  * is a valid block device, and isn't being used by anyone else,
   1999  * set *vpp to the file's vnode.
   2000  * You'll find the original of this in ccd.c
   2001  */
   2002 int
   2003 raidlookup(path, p, vpp)
   2004 	char   *path;
   2005 	struct proc *p;
   2006 	struct vnode **vpp;	/* result */
   2007 {
   2008 	struct nameidata nd;
   2009 	struct vnode *vp;
   2010 	struct vattr va;
   2011 	int     error;
   2012 
   2013 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2014 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2015 #ifdef DEBUG
   2016 		printf("RAIDframe: vn_open returned %d\n", error);
   2017 #endif
   2018 		return (error);
   2019 	}
   2020 	vp = nd.ni_vp;
   2021 	if (vp->v_usecount > 1) {
   2022 		VOP_UNLOCK(vp, 0);
   2023 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2024 		return (EBUSY);
   2025 	}
   2026 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2027 		VOP_UNLOCK(vp, 0);
   2028 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2029 		return (error);
   2030 	}
   2031 	/* XXX: eventually we should handle VREG, too. */
   2032 	if (va.va_type != VBLK) {
   2033 		VOP_UNLOCK(vp, 0);
   2034 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2035 		return (ENOTBLK);
   2036 	}
   2037 	VOP_UNLOCK(vp, 0);
   2038 	*vpp = vp;
   2039 	return (0);
   2040 }
   2041 /*
   2042  * Wait interruptibly for an exclusive lock.
   2043  *
   2044  * XXX
   2045  * Several drivers do this; it should be abstracted and made MP-safe.
   2046  * (Hmm... where have we seen this warning before :->  GO )
   2047  */
   2048 static int
   2049 raidlock(rs)
   2050 	struct raid_softc *rs;
   2051 {
   2052 	int     error;
   2053 
   2054 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2055 		rs->sc_flags |= RAIDF_WANTED;
   2056 		if ((error =
   2057 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2058 			return (error);
   2059 	}
   2060 	rs->sc_flags |= RAIDF_LOCKED;
   2061 	return (0);
   2062 }
   2063 /*
   2064  * Unlock and wake up any waiters.
   2065  */
   2066 static void
   2067 raidunlock(rs)
   2068 	struct raid_softc *rs;
   2069 {
   2070 
   2071 	rs->sc_flags &= ~RAIDF_LOCKED;
   2072 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2073 		rs->sc_flags &= ~RAIDF_WANTED;
   2074 		wakeup(rs);
   2075 	}
   2076 }
   2077 
   2078 
   2079 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2080 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2081 
   2082 int
   2083 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2084 {
   2085 	RF_ComponentLabel_t clabel;
   2086 	raidread_component_label(dev, b_vp, &clabel);
   2087 	clabel.mod_counter = mod_counter;
   2088 	clabel.clean = RF_RAID_CLEAN;
   2089 	raidwrite_component_label(dev, b_vp, &clabel);
   2090 	return(0);
   2091 }
   2092 
   2093 
   2094 int
   2095 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2096 {
   2097 	RF_ComponentLabel_t clabel;
   2098 	raidread_component_label(dev, b_vp, &clabel);
   2099 	clabel.mod_counter = mod_counter;
   2100 	clabel.clean = RF_RAID_DIRTY;
   2101 	raidwrite_component_label(dev, b_vp, &clabel);
   2102 	return(0);
   2103 }
   2104 
   2105 /* ARGSUSED */
   2106 int
   2107 raidread_component_label(dev, b_vp, clabel)
   2108 	dev_t dev;
   2109 	struct vnode *b_vp;
   2110 	RF_ComponentLabel_t *clabel;
   2111 {
   2112 	struct buf *bp;
   2113 	int error;
   2114 
   2115 	/* XXX should probably ensure that we don't try to do this if
   2116 	   someone has changed rf_protected_sectors. */
   2117 
   2118 	/* get a block of the appropriate size... */
   2119 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2120 	bp->b_dev = dev;
   2121 
   2122 	/* get our ducks in a row for the read */
   2123 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2124 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2125 	bp->b_flags = B_BUSY | B_READ;
   2126  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2127 
   2128 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2129 
   2130 	error = biowait(bp);
   2131 
   2132 	if (!error) {
   2133 		memcpy(clabel, bp->b_un.b_addr,
   2134 		       sizeof(RF_ComponentLabel_t));
   2135 #if 0
   2136 		print_component_label( clabel );
   2137 #endif
   2138         } else {
   2139 #if 0
   2140 		printf("Failed to read RAID component label!\n");
   2141 #endif
   2142 	}
   2143 
   2144         bp->b_flags = B_INVAL | B_AGE;
   2145 	brelse(bp);
   2146 	return(error);
   2147 }
   2148 /* ARGSUSED */
   2149 int
   2150 raidwrite_component_label(dev, b_vp, clabel)
   2151 	dev_t dev;
   2152 	struct vnode *b_vp;
   2153 	RF_ComponentLabel_t *clabel;
   2154 {
   2155 	struct buf *bp;
   2156 	int error;
   2157 
   2158 	/* get a block of the appropriate size... */
   2159 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2160 	bp->b_dev = dev;
   2161 
   2162 	/* get our ducks in a row for the write */
   2163 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2164 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2165 	bp->b_flags = B_BUSY | B_WRITE;
   2166  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2167 
   2168 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2169 
   2170 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2171 
   2172 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2173 	error = biowait(bp);
   2174         bp->b_flags = B_INVAL | B_AGE;
   2175 	brelse(bp);
   2176 	if (error) {
   2177 #if 1
   2178 		printf("Failed to write RAID component info!\n");
   2179 #endif
   2180 	}
   2181 
   2182 	return(error);
   2183 }
   2184 
   2185 void
   2186 rf_markalldirty( raidPtr )
   2187 	RF_Raid_t *raidPtr;
   2188 {
   2189 	RF_ComponentLabel_t clabel;
   2190 	int r,c;
   2191 
   2192 	raidPtr->mod_counter++;
   2193 	for (r = 0; r < raidPtr->numRow; r++) {
   2194 		for (c = 0; c < raidPtr->numCol; c++) {
   2195 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2196 				raidread_component_label(
   2197 					raidPtr->Disks[r][c].dev,
   2198 					raidPtr->raid_cinfo[r][c].ci_vp,
   2199 					&clabel);
   2200 				if (clabel.status == rf_ds_spared) {
   2201 					/* XXX do something special...
   2202 					 but whatever you do, don't
   2203 					 try to access it!! */
   2204 				} else {
   2205 #if 0
   2206 				clabel.status =
   2207 					raidPtr->Disks[r][c].status;
   2208 				raidwrite_component_label(
   2209 					raidPtr->Disks[r][c].dev,
   2210 					raidPtr->raid_cinfo[r][c].ci_vp,
   2211 					&clabel);
   2212 #endif
   2213 				raidmarkdirty(
   2214 				       raidPtr->Disks[r][c].dev,
   2215 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2216 				       raidPtr->mod_counter);
   2217 				}
   2218 			}
   2219 		}
   2220 	}
   2221 	/* printf("Component labels marked dirty.\n"); */
   2222 #if 0
   2223 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2224 		sparecol = raidPtr->numCol + c;
   2225 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2226 			/*
   2227 
   2228 			   XXX this is where we get fancy and map this spare
   2229 			   into it's correct spot in the array.
   2230 
   2231 			 */
   2232 			/*
   2233 
   2234 			   we claim this disk is "optimal" if it's
   2235 			   rf_ds_used_spare, as that means it should be
   2236 			   directly substitutable for the disk it replaced.
   2237 			   We note that too...
   2238 
   2239 			 */
   2240 
   2241 			for(i=0;i<raidPtr->numRow;i++) {
   2242 				for(j=0;j<raidPtr->numCol;j++) {
   2243 					if ((raidPtr->Disks[i][j].spareRow ==
   2244 					     r) &&
   2245 					    (raidPtr->Disks[i][j].spareCol ==
   2246 					     sparecol)) {
   2247 						srow = r;
   2248 						scol = sparecol;
   2249 						break;
   2250 					}
   2251 				}
   2252 			}
   2253 
   2254 			raidread_component_label(
   2255 				      raidPtr->Disks[r][sparecol].dev,
   2256 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2257 				      &clabel);
   2258 			/* make sure status is noted */
   2259 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2260 			clabel.mod_counter = raidPtr->mod_counter;
   2261 			clabel.serial_number = raidPtr->serial_number;
   2262 			clabel.row = srow;
   2263 			clabel.column = scol;
   2264 			clabel.num_rows = raidPtr->numRow;
   2265 			clabel.num_columns = raidPtr->numCol;
   2266 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2267 			clabel.status = rf_ds_optimal;
   2268 			raidwrite_component_label(
   2269 				      raidPtr->Disks[r][sparecol].dev,
   2270 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2271 				      &clabel);
   2272 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2273 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2274 		}
   2275 	}
   2276 
   2277 #endif
   2278 }
   2279 
   2280 
   2281 void
   2282 rf_update_component_labels( raidPtr )
   2283 	RF_Raid_t *raidPtr;
   2284 {
   2285 	RF_ComponentLabel_t clabel;
   2286 	int sparecol;
   2287 	int r,c;
   2288 	int i,j;
   2289 	int srow, scol;
   2290 
   2291 	srow = -1;
   2292 	scol = -1;
   2293 
   2294 	/* XXX should do extra checks to make sure things really are clean,
   2295 	   rather than blindly setting the clean bit... */
   2296 
   2297 	raidPtr->mod_counter++;
   2298 
   2299 	for (r = 0; r < raidPtr->numRow; r++) {
   2300 		for (c = 0; c < raidPtr->numCol; c++) {
   2301 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2302 				raidread_component_label(
   2303 					raidPtr->Disks[r][c].dev,
   2304 					raidPtr->raid_cinfo[r][c].ci_vp,
   2305 					&clabel);
   2306 				/* make sure status is noted */
   2307 				clabel.status = rf_ds_optimal;
   2308 				/* bump the counter */
   2309 				clabel.mod_counter++;
   2310 #if 0
   2311 				/* note where this set was configured last */
   2312 				clabel.last_unit = raidPtr->raidid;
   2313 #endif
   2314 #if DEBUG
   2315 				if (raidPtr->mod_counter !=
   2316 				    clabel.mod_counter) {
   2317 					printf("raid%d: mod_counter for row: %d col: %d not in sync\n", raidPtr->raidid, r, c);
   2318 				}
   2319 #endif
   2320 
   2321 				raidwrite_component_label(
   2322 					raidPtr->Disks[r][c].dev,
   2323 					raidPtr->raid_cinfo[r][c].ci_vp,
   2324 					&clabel);
   2325 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2326 					raidmarkclean(
   2327 					      raidPtr->Disks[r][c].dev,
   2328 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2329 					      raidPtr->mod_counter);
   2330 				}
   2331 			}
   2332 			/* else we don't touch it.. */
   2333 #if 0
   2334 			else if (raidPtr->Disks[r][c].status !=
   2335 				   rf_ds_failed) {
   2336 				raidread_component_label(
   2337 					raidPtr->Disks[r][c].dev,
   2338 					raidPtr->raid_cinfo[r][c].ci_vp,
   2339 					&clabel);
   2340 				/* make sure status is noted */
   2341 				clabel.status =
   2342 					raidPtr->Disks[r][c].status;
   2343 				raidwrite_component_label(
   2344 					raidPtr->Disks[r][c].dev,
   2345 					raidPtr->raid_cinfo[r][c].ci_vp,
   2346 					&clabel);
   2347 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2348 					raidmarkclean(
   2349 					      raidPtr->Disks[r][c].dev,
   2350 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2351 					      raidPtr->mod_counter);
   2352 				}
   2353 			}
   2354 #endif
   2355 		}
   2356 	}
   2357 
   2358 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2359 		sparecol = raidPtr->numCol + c;
   2360 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2361 			/*
   2362 
   2363 			   we claim this disk is "optimal" if it's
   2364 			   rf_ds_used_spare, as that means it should be
   2365 			   directly substitutable for the disk it replaced.
   2366 			   We note that too...
   2367 
   2368 			 */
   2369 
   2370 			for(i=0;i<raidPtr->numRow;i++) {
   2371 				for(j=0;j<raidPtr->numCol;j++) {
   2372 					if ((raidPtr->Disks[i][j].spareRow ==
   2373 					     0) &&
   2374 					    (raidPtr->Disks[i][j].spareCol ==
   2375 					     sparecol)) {
   2376 						srow = i;
   2377 						scol = j;
   2378 						break;
   2379 					}
   2380 				}
   2381 			}
   2382 
   2383 			/* XXX shouldn't *really* need this... */
   2384 			raidread_component_label(
   2385 				      raidPtr->Disks[0][sparecol].dev,
   2386 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2387 				      &clabel);
   2388 			/* make sure status is noted */
   2389 
   2390 			raid_init_component_label(raidPtr, &clabel);
   2391 
   2392 			clabel.mod_counter = raidPtr->mod_counter;
   2393 			clabel.row = srow;
   2394 			clabel.column = scol;
   2395 			clabel.status = rf_ds_optimal;
   2396 
   2397 			raidwrite_component_label(
   2398 				      raidPtr->Disks[0][sparecol].dev,
   2399 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2400 				      &clabel);
   2401 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2402 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2403 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2404 					       raidPtr->mod_counter);
   2405 			}
   2406 		}
   2407 	}
   2408 	/* 	printf("Component labels updated\n"); */
   2409 }
   2410 
   2411 void
   2412 rf_ReconThread(req)
   2413 	struct rf_recon_req *req;
   2414 {
   2415 	int     s;
   2416 	RF_Raid_t *raidPtr;
   2417 
   2418 	s = splbio();
   2419 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2420 	raidPtr->recon_in_progress = 1;
   2421 
   2422 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2423 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2424 
   2425 	/* XXX get rid of this! we don't need it at all.. */
   2426 	RF_Free(req, sizeof(*req));
   2427 
   2428 	raidPtr->recon_in_progress = 0;
   2429 	splx(s);
   2430 
   2431 	/* That's all... */
   2432 	kthread_exit(0);        /* does not return */
   2433 }
   2434 
   2435 void
   2436 rf_RewriteParityThread(raidPtr)
   2437 	RF_Raid_t *raidPtr;
   2438 {
   2439 	int retcode;
   2440 	int s;
   2441 
   2442 	raidPtr->parity_rewrite_in_progress = 1;
   2443 	s = splbio();
   2444 	retcode = rf_RewriteParity(raidPtr);
   2445 	splx(s);
   2446 	if (retcode) {
   2447 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2448 	} else {
   2449 		/* set the clean bit!  If we shutdown correctly,
   2450 		   the clean bit on each component label will get
   2451 		   set */
   2452 		raidPtr->parity_good = RF_RAID_CLEAN;
   2453 	}
   2454 	raidPtr->parity_rewrite_in_progress = 0;
   2455 
   2456 	/* That's all... */
   2457 	kthread_exit(0);        /* does not return */
   2458 }
   2459 
   2460 
   2461 void
   2462 rf_CopybackThread(raidPtr)
   2463 	RF_Raid_t *raidPtr;
   2464 {
   2465 	int s;
   2466 
   2467 	raidPtr->copyback_in_progress = 1;
   2468 	s = splbio();
   2469 	rf_CopybackReconstructedData(raidPtr);
   2470 	splx(s);
   2471 	raidPtr->copyback_in_progress = 0;
   2472 
   2473 	/* That's all... */
   2474 	kthread_exit(0);        /* does not return */
   2475 }
   2476 
   2477 
   2478 void
   2479 rf_ReconstructInPlaceThread(req)
   2480 	struct rf_recon_req *req;
   2481 {
   2482 	int retcode;
   2483 	int s;
   2484 	RF_Raid_t *raidPtr;
   2485 
   2486 	s = splbio();
   2487 	raidPtr = req->raidPtr;
   2488 	raidPtr->recon_in_progress = 1;
   2489 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2490 	RF_Free(req, sizeof(*req));
   2491 	raidPtr->recon_in_progress = 0;
   2492 	splx(s);
   2493 
   2494 	/* That's all... */
   2495 	kthread_exit(0);        /* does not return */
   2496 }
   2497 
   2498 void
   2499 rf_mountroot_hook(dev)
   2500 	struct device *dev;
   2501 {
   2502 #if 1
   2503 	printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
   2504 #endif
   2505 	if (boothowto & RB_ASKNAME) {
   2506 		/* We don't auto-config... */
   2507 	} else {
   2508 		/* They didn't ask, and we found something bootable... */
   2509 		/* XXX pretend for now.. */
   2510 if (raidautoconfig) {
   2511 		rootspec = raid_rooty;
   2512 }
   2513 	}
   2514 }
   2515 
   2516 
   2517 RF_AutoConfig_t *
   2518 rf_find_raid_components()
   2519 {
   2520 	struct devnametobdevmaj *dtobdm;
   2521 	struct vnode *vp;
   2522 	struct disklabel label;
   2523 	struct device *dv;
   2524 	char *cd_name;
   2525 	dev_t dev;
   2526 	int error;
   2527 	int i;
   2528 	int good_one;
   2529 	RF_ComponentLabel_t *clabel;
   2530 	RF_AutoConfig_t *ac_list;
   2531 	RF_AutoConfig_t *ac;
   2532 
   2533 
   2534 	/* initialize the AutoConfig list */
   2535 	ac_list = NULL;
   2536 
   2537 if (raidautoconfig) {
   2538 
   2539 	/* we begin by trolling through *all* the devices on the system */
   2540 
   2541 	for (dv = alldevs.tqh_first; dv != NULL;
   2542 	     dv = dv->dv_list.tqe_next) {
   2543 
   2544 		/* we are only interested in disks... */
   2545 		if (dv->dv_class != DV_DISK)
   2546 			continue;
   2547 
   2548 		/* we don't care about floppies... */
   2549 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2550 			continue;
   2551 		}
   2552 
   2553 		/* need to find the device_name_to_block_device_major stuff */
   2554 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2555 		dtobdm = dev_name2blk;
   2556 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2557 			dtobdm++;
   2558 		}
   2559 
   2560 		/* get a vnode for the raw partition of this disk */
   2561 
   2562 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2563 		if (bdevvp(dev, &vp))
   2564 			panic("RAID can't alloc vnode");
   2565 
   2566 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2567 
   2568 		if (error) {
   2569 			/* "Who cares."  Continue looking
   2570 			   for something that exists*/
   2571 			vput(vp);
   2572 			continue;
   2573 		}
   2574 
   2575 		/* Ok, the disk exists.  Go get the disklabel. */
   2576 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2577 				  FREAD, NOCRED, 0);
   2578 		if (error) {
   2579 			/*
   2580 			 * XXX can't happen - open() would
   2581 			 * have errored out (or faked up one)
   2582 			 */
   2583 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2584 			       dv->dv_xname, 'a' + RAW_PART, error);
   2585 		}
   2586 
   2587 		/* don't need this any more.  We'll allocate it again
   2588 		   a little later if we really do... */
   2589 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2590 		vput(vp);
   2591 
   2592 		for (i=0; i < label.d_npartitions; i++) {
   2593 			/* We only support partitions marked as RAID */
   2594 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2595 				continue;
   2596 
   2597 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2598 			if (bdevvp(dev, &vp))
   2599 				panic("RAID can't alloc vnode");
   2600 
   2601 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2602 			if (error) {
   2603 				/* Whatever... */
   2604 				vput(vp);
   2605 				continue;
   2606 			}
   2607 
   2608 			good_one = 0;
   2609 
   2610 			clabel = (RF_ComponentLabel_t *)
   2611 				malloc(sizeof(RF_ComponentLabel_t),
   2612 				       M_RAIDFRAME, M_NOWAIT);
   2613 			if (clabel == NULL) {
   2614 				/* XXX CLEANUP HERE */
   2615 				printf("RAID auto config: out of memory!\n");
   2616 				return(NULL); /* XXX probably should panic? */
   2617 			}
   2618 
   2619 			if (!raidread_component_label(dev, vp, clabel)) {
   2620 				/* Got the label.  Does it look reasonable? */
   2621 				if (rf_reasonable_label(clabel) &&
   2622 				    (clabel->partitionSize <=
   2623 				     label.d_partitions[i].p_size)) {
   2624 #if DEBUG
   2625 					printf("Component on: %s%c: %d\n",
   2626 					       dv->dv_xname, 'a'+i,
   2627 					       label.d_partitions[i].p_size);
   2628 					print_component_label(clabel);
   2629 #endif
   2630 					/* if it's reasonable, add it,
   2631 					   else ignore it. */
   2632 					ac = (RF_AutoConfig_t *)
   2633 						malloc(sizeof(RF_AutoConfig_t),
   2634 						       M_RAIDFRAME,
   2635 						       M_NOWAIT);
   2636 					if (ac == NULL) {
   2637 						/* XXX should panic?? */
   2638 						return(NULL);
   2639 					}
   2640 
   2641 					sprintf(ac->devname, "%s%c",
   2642 						dv->dv_xname, 'a'+i);
   2643 					ac->dev = dev;
   2644 					ac->vp = vp;
   2645 					ac->clabel = clabel;
   2646 					ac->next = ac_list;
   2647 					ac_list = ac;
   2648 					good_one = 1;
   2649 				}
   2650 			}
   2651 			if (!good_one) {
   2652 				/* cleanup */
   2653 				free(clabel, M_RAIDFRAME);
   2654 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2655 				vput(vp);
   2656 			}
   2657 		}
   2658 	}
   2659 }
   2660 return(ac_list);
   2661 }
   2662 
   2663 static int
   2664 rf_reasonable_label(clabel)
   2665 	RF_ComponentLabel_t *clabel;
   2666 {
   2667 
   2668 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2669 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2670 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2671 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2672 	    clabel->row >=0 &&
   2673 	    clabel->column >= 0 &&
   2674 	    clabel->num_rows > 0 &&
   2675 	    clabel->num_columns > 0 &&
   2676 	    clabel->row < clabel->num_rows &&
   2677 	    clabel->column < clabel->num_columns &&
   2678 	    clabel->blockSize > 0 &&
   2679 	    clabel->numBlocks > 0) {
   2680 		/* label looks reasonable enough... */
   2681 		return(1);
   2682 	}
   2683 	return(0);
   2684 }
   2685 
   2686 
   2687 void
   2688 print_component_label(clabel)
   2689 	RF_ComponentLabel_t *clabel;
   2690 {
   2691 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2692 	       clabel->row, clabel->column,
   2693 	       clabel->num_rows, clabel->num_columns);
   2694 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2695 	       clabel->version, clabel->serial_number,
   2696 	       clabel->mod_counter);
   2697 	printf("   Clean: %s Status: %d\n",
   2698 	       clabel->clean ? "Yes" : "No", clabel->status );
   2699 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2700 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2701 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2702 	       (char) clabel->parityConfig, clabel->blockSize,
   2703 	       clabel->numBlocks);
   2704 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2705 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2706 #if 0
   2707 	   printf("   Config order: %d\n", clabel->config_order);
   2708 #endif
   2709 
   2710 }
   2711 
   2712 RF_ConfigSet_t *
   2713 rf_create_auto_sets(ac_list)
   2714 	RF_AutoConfig_t *ac_list;
   2715 {
   2716 	RF_AutoConfig_t *ac;
   2717 	RF_ConfigSet_t *config_sets;
   2718 	RF_ConfigSet_t *cset;
   2719 	RF_AutoConfig_t *ac_next;
   2720 
   2721 
   2722 	config_sets = NULL;
   2723 
   2724 	/* Go through the AutoConfig list, and figure out which components
   2725 	   belong to what sets.  */
   2726 	ac = ac_list;
   2727 	while(ac!=NULL) {
   2728 		/* we're going to putz with ac->next, so save it here
   2729 		   for use at the end of the loop */
   2730 		ac_next = ac->next;
   2731 
   2732 		if (config_sets == NULL) {
   2733 			/* will need at least this one... */
   2734 			config_sets = (RF_ConfigSet_t *)
   2735 				malloc(sizeof(RF_ConfigSet_t),
   2736 				       M_RAIDFRAME, M_NOWAIT);
   2737 			if (config_sets == NULL) {
   2738 				panic("rf_create_auto_sets: No memory!\n");
   2739 			}
   2740 			/* this one is easy :) */
   2741 			config_sets->ac = ac;
   2742 			config_sets->next = NULL;
   2743 			config_sets->rootable = 0;
   2744 			ac->next = NULL;
   2745 		} else {
   2746 			/* which set does this component fit into? */
   2747 			cset = config_sets;
   2748 			while(cset!=NULL) {
   2749 				if (rf_does_it_fit(cset, ac)) {
   2750 					/* looks like it matches */
   2751 					ac->next = cset->ac;
   2752 					cset->ac = ac;
   2753 					break;
   2754 				}
   2755 				cset = cset->next;
   2756 			}
   2757 			if (cset==NULL) {
   2758 				/* didn't find a match above... new set..*/
   2759 				cset = (RF_ConfigSet_t *)
   2760 					malloc(sizeof(RF_ConfigSet_t),
   2761 					       M_RAIDFRAME, M_NOWAIT);
   2762 				if (cset == NULL) {
   2763 					panic("rf_create_auto_sets: No memory!\n");
   2764 				}
   2765 				cset->ac = ac;
   2766 				ac->next = NULL;
   2767 				cset->next = config_sets;
   2768 				cset->rootable = 0;
   2769 				config_sets = cset;
   2770 			}
   2771 		}
   2772 		ac = ac_next;
   2773 	}
   2774 
   2775 
   2776 	return(config_sets);
   2777 }
   2778 
   2779 static int
   2780 rf_does_it_fit(cset, ac)
   2781 	RF_ConfigSet_t *cset;
   2782 	RF_AutoConfig_t *ac;
   2783 {
   2784 	RF_ComponentLabel_t *clabel1, *clabel2;
   2785 
   2786 	/* If this one matches the *first* one in the set, that's good
   2787 	   enough, since the other members of the set would have been
   2788 	   through here too... */
   2789 	/* note that we are not checking partitionSize here.. */
   2790 
   2791 	clabel1 = cset->ac->clabel;
   2792 	clabel2 = ac->clabel;
   2793 	if ((clabel1->version == clabel2->version) &&
   2794 	    (clabel1->serial_number == clabel2->serial_number) &&
   2795 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2796 	    (clabel1->num_rows == clabel2->num_rows) &&
   2797 	    (clabel1->num_columns == clabel2->num_columns) &&
   2798 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2799 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2800 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2801 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2802 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2803 	    (clabel1->blockSize == clabel2->blockSize) &&
   2804 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2805 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2806 	    (clabel1->root_partition == clabel2->root_partition) &&
   2807 	    (clabel1->last_unit == clabel2->last_unit) &&
   2808 	    (clabel1->config_order == clabel2->config_order)) {
   2809 		/* if it get's here, it almost *has* to be a match */
   2810 	} else {
   2811 		/* it's not consistent with somebody in the set..
   2812 		   punt */
   2813 		return(0);
   2814 	}
   2815 	/* all was fine.. it must fit... */
   2816 	return(1);
   2817 }
   2818 
   2819 int
   2820 rf_have_enough_components(cset)
   2821 	RF_ConfigSet_t *cset;
   2822 {
   2823 	RF_AutoConfig_t *ac;
   2824 	RF_AutoConfig_t *auto_config;
   2825 	RF_ComponentLabel_t *clabel;
   2826 	int r,c;
   2827 	int num_rows;
   2828 	int num_cols;
   2829 	int num_missing;
   2830 
   2831 	/* check to see that we have enough 'live' components
   2832 	   of this set.  If so, we can configure it if necessary */
   2833 
   2834 	num_rows = cset->ac->clabel->num_rows;
   2835 	num_cols = cset->ac->clabel->num_columns;
   2836 
   2837 	/* XXX Check for duplicate components!?!?!? */
   2838 
   2839 	num_missing = 0;
   2840 	auto_config = cset->ac;
   2841 
   2842 	for(r=0; r<num_rows; r++) {
   2843 		for(c=0; c<num_cols; c++) {
   2844 			ac = auto_config;
   2845 			while(ac!=NULL) {
   2846 				if (ac->clabel==NULL) {
   2847 					/* big-time bad news. */
   2848 					goto fail;
   2849 				}
   2850 				if ((ac->clabel->row == r) &&
   2851 				    (ac->clabel->column == c)) {
   2852 					/* it's this one... */
   2853 #if DEBUG
   2854 					printf("Found: %s at %d,%d\n",
   2855 					       ac->devname,r,c);
   2856 #endif
   2857 					break;
   2858 				}
   2859 				ac=ac->next;
   2860 			}
   2861 			if (ac==NULL) {
   2862 				/* Didn't find one here! */
   2863 				num_missing++;
   2864 			}
   2865 		}
   2866 	}
   2867 
   2868 	clabel = cset->ac->clabel;
   2869 
   2870 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2871 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2872 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2873 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2874 		/* XXX this needs to be made *much* more general */
   2875 		/* Too many failures */
   2876 		return(0);
   2877 	}
   2878 	/* otherwise, all is well, and we've got enough to take a kick
   2879 	   at autoconfiguring this set */
   2880 	return(1);
   2881 fail:
   2882 	return(0);
   2883 
   2884 }
   2885 
   2886 void
   2887 rf_create_configuration(ac,config,raidPtr)
   2888 	RF_AutoConfig_t *ac;
   2889 	RF_Config_t *config;
   2890 	RF_Raid_t *raidPtr;
   2891 {
   2892 	RF_ComponentLabel_t *clabel;
   2893 
   2894 	clabel = ac->clabel;
   2895 
   2896 	/* 1. Fill in the common stuff */
   2897 	config->numRow = clabel->num_rows;
   2898 	config->numCol = clabel->num_columns;
   2899 	config->numSpare = 0; /* XXX should this be set here? */
   2900 	config->sectPerSU = clabel->sectPerSU;
   2901 	config->SUsPerPU = clabel->SUsPerPU;
   2902 	config->SUsPerRU = clabel->SUsPerRU;
   2903 	config->parityConfig = clabel->parityConfig;
   2904 	/* XXX... */
   2905 	strcpy(config->diskQueueType,"fifo");
   2906 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2907 	config->layoutSpecificSize = 0; /* XXX ?? */
   2908 
   2909 	while(ac!=NULL) {
   2910 		/* row/col values will be in range due to the checks
   2911 		   in reasonable_label() */
   2912 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2913 		       ac->devname);
   2914 		ac = ac->next;
   2915 	}
   2916 
   2917 }
   2918 
   2919 int
   2920 rf_set_autoconfig(raidPtr, new_value)
   2921 	RF_Raid_t *raidPtr;
   2922 	int new_value;
   2923 {
   2924 	RF_ComponentLabel_t clabel;
   2925 	struct vnode *vp;
   2926 	dev_t dev;
   2927 	int row, column;
   2928 
   2929 	raidPtr->autoconfigure = new_value;
   2930 	for(row=0; row<raidPtr->numRow; row++) {
   2931 		for(column=0; column<raidPtr->numCol; column++) {
   2932 			dev = raidPtr->Disks[row][column].dev;
   2933 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2934 			raidread_component_label(dev, vp, &clabel);
   2935 			clabel.autoconfigure = new_value;
   2936 			raidwrite_component_label(dev, vp, &clabel);
   2937 		}
   2938 	}
   2939 	return(new_value);
   2940 }
   2941 
   2942 int
   2943 rf_set_rootpartition(raidPtr, new_value)
   2944 	RF_Raid_t *raidPtr;
   2945 	int new_value;
   2946 {
   2947 	RF_ComponentLabel_t clabel;
   2948 	struct vnode *vp;
   2949 	dev_t dev;
   2950 	int row, column;
   2951 
   2952 	raidPtr->root_partition = new_value;
   2953 	for(row=0; row<raidPtr->numRow; row++) {
   2954 		for(column=0; column<raidPtr->numCol; column++) {
   2955 			dev = raidPtr->Disks[row][column].dev;
   2956 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2957 			raidread_component_label(dev, vp, &clabel);
   2958 			clabel.root_partition = new_value;
   2959 			raidwrite_component_label(dev, vp, &clabel);
   2960 		}
   2961 	}
   2962 	return(new_value);
   2963 }
   2964 
   2965 void
   2966 rf_release_all_vps(cset)
   2967 	RF_ConfigSet_t *cset;
   2968 {
   2969 	RF_AutoConfig_t *ac;
   2970 
   2971 	ac = cset->ac;
   2972 	while(ac!=NULL) {
   2973 		/* Close the vp, and give it back */
   2974 		if (ac->vp) {
   2975 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2976 			vput(ac->vp);
   2977 		}
   2978 		ac = ac->next;
   2979 	}
   2980 }
   2981 
   2982 
   2983 void
   2984 rf_cleanup_config_set(cset)
   2985 	RF_ConfigSet_t *cset;
   2986 {
   2987 	RF_AutoConfig_t *ac;
   2988 	RF_AutoConfig_t *next_ac;
   2989 
   2990 	ac = cset->ac;
   2991 	while(ac!=NULL) {
   2992 		next_ac = ac->next;
   2993 		/* nuke the label */
   2994 		free(ac->clabel, M_RAIDFRAME);
   2995 		/* cleanup the config structure */
   2996 		free(ac, M_RAIDFRAME);
   2997 		/* "next.." */
   2998 		ac = next_ac;
   2999 	}
   3000 	/* and, finally, nuke the config set */
   3001 	free(cset, M_RAIDFRAME);
   3002 }
   3003 
   3004 
   3005 void
   3006 raid_init_component_label(raidPtr, clabel)
   3007 	RF_Raid_t *raidPtr;
   3008 	RF_ComponentLabel_t *clabel;
   3009 {
   3010 	/* current version number */
   3011 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3012 	clabel->serial_number = raidPtr->serial_number;
   3013 	clabel->mod_counter = raidPtr->mod_counter;
   3014 	clabel->num_rows = raidPtr->numRow;
   3015 	clabel->num_columns = raidPtr->numCol;
   3016 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3017 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3018 
   3019 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3020 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3021 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3022 
   3023 	clabel->blockSize = raidPtr->bytesPerSector;
   3024 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3025 
   3026 	/* XXX not portable */
   3027 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3028 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3029 	clabel->autoconfigure = raidPtr->autoconfigure;
   3030 	clabel->root_partition = raidPtr->root_partition;
   3031 	clabel->last_unit = raidPtr->raidid;
   3032 	clabel->config_order = raidPtr->config_order;
   3033 }
   3034 
   3035 int
   3036 rf_auto_config_set(cset,unit)
   3037 	RF_ConfigSet_t *cset;
   3038 	int *unit;
   3039 {
   3040 	RF_Raid_t *raidPtr;
   3041 	RF_Config_t *config;
   3042 	int raidID;
   3043 	int retcode;
   3044 
   3045 	printf("Starting autoconfigure on raid%d\n",raidID);
   3046 
   3047 	retcode = 0;
   3048 	*unit = -1;
   3049 
   3050 	/* 1. Create a config structure */
   3051 
   3052 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3053 				       M_RAIDFRAME,
   3054 				       M_NOWAIT);
   3055 	if (config==NULL) {
   3056 		printf("Out of mem!?!?\n");
   3057 				/* XXX do something more intelligent here. */
   3058 		return(1);
   3059 	}
   3060 	/* XXX raidID needs to be set correctly.. */
   3061 
   3062 	/*
   3063 	   2. Figure out what RAID ID this one is supposed to live at
   3064 	   See if we can get the same RAID dev that it was configured
   3065 	   on last time..
   3066 	*/
   3067 
   3068 	raidID = cset->ac->clabel->last_unit;
   3069 	if ((raidID < 0) || (raidID >= numraid)) {
   3070 		/* let's not wander off into lala land. */
   3071 		raidID = numraid - 1;
   3072 	}
   3073 	if (raidPtrs[raidID]->valid != 0) {
   3074 
   3075 		/*
   3076 		   Nope... Go looking for an alternative...
   3077 		   Start high so we don't immediately use raid0 if that's
   3078 		   not taken.
   3079 		*/
   3080 
   3081 		for(raidID = numraid; raidID >= 0; raidID--) {
   3082 			if (raidPtrs[raidID]->valid == 0) {
   3083 				/* can use this one! */
   3084 				break;
   3085 			}
   3086 		}
   3087 	}
   3088 
   3089 	if (raidID < 0) {
   3090 		/* punt... */
   3091 		printf("Unable to auto configure this set!\n");
   3092 		printf("(Out of RAID devs!)\n");
   3093 		return(1);
   3094 	}
   3095 
   3096 	raidPtr = raidPtrs[raidID];
   3097 
   3098 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3099 	raidPtr->raidid = raidID;
   3100 	raidPtr->openings = RAIDOUTSTANDING;
   3101 
   3102 	/* 3. Build the configuration structure */
   3103 	rf_create_configuration(cset->ac, config, raidPtr);
   3104 
   3105 	/* 4. Do the configuration */
   3106 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3107 
   3108 	if (retcode == 0) {
   3109 #if DEBUG
   3110 		printf("Calling raidinit()\n");
   3111 #endif
   3112 				/* XXX the 0 below is bogus! */
   3113 		retcode = raidinit(0, raidPtrs[raidID], raidID);
   3114 		if (retcode) {
   3115 			printf("init returned: %d\n",retcode);
   3116 		}
   3117 		rf_markalldirty( raidPtrs[raidID] );
   3118 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3119 		if (cset->ac->clabel->root_partition==1) {
   3120 			/* everything configured just fine.  Make a note
   3121 			   that this set is eligible to be root. */
   3122 			cset->rootable = 1;
   3123 			/* XXX do this here? */
   3124 			raidPtrs[raidID]->root_partition = 1;
   3125 		}
   3126 	}
   3127 
   3128 	/* 5. Cleanup */
   3129 	free(config, M_RAIDFRAME);
   3130 
   3131 	*unit = raidID;
   3132 	return(retcode);
   3133 }
   3134