Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.54
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.54 2000/02/23 02:04:21 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static int raidinit __P((dev_t, RF_Raid_t *, int));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208    Note: Don't use sc_dev until the raidinit(0,_,_) call in
    209    rf_auto_config_set() actually passes in a real dev_t!  */
    210 
    211 struct raid_softc {
    212 	int     sc_flags;	/* flags */
    213 	int     sc_cflags;	/* configuration flags */
    214 	size_t  sc_size;        /* size of the raid device */
    215 	dev_t   sc_dev;	        /* our device.. */
    216 	char    sc_xname[20];	/* XXX external name */
    217 	struct disk sc_dkdev;	/* generic disk device info */
    218 	struct pool sc_cbufpool;	/* component buffer pool */
    219 	struct buf_queue buf_queue;	/* used for the device queue */
    220 };
    221 /* sc_flags */
    222 #define RAIDF_INITED	0x01	/* unit has been initialized */
    223 #define RAIDF_WLABEL	0x02	/* label area is writable */
    224 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    225 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    226 #define RAIDF_LOCKED	0x80	/* unit is locked */
    227 
    228 #define	raidunit(x)	DISKUNIT(x)
    229 int numraid = 0;
    230 
    231 /*
    232  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    233  * Be aware that large numbers can allow the driver to consume a lot of
    234  * kernel memory, especially on writes, and in degraded mode reads.
    235  *
    236  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    237  * a single 64K write will typically require 64K for the old data,
    238  * 64K for the old parity, and 64K for the new parity, for a total
    239  * of 192K (if the parity buffer is not re-used immediately).
    240  * Even it if is used immedately, that's still 128K, which when multiplied
    241  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    242  *
    243  * Now in degraded mode, for example, a 64K read on the above setup may
    244  * require data reconstruction, which will require *all* of the 4 remaining
    245  * disks to participate -- 4 * 32K/disk == 128K again.
    246  */
    247 
    248 #ifndef RAIDOUTSTANDING
    249 #define RAIDOUTSTANDING   6
    250 #endif
    251 
    252 #define RAIDLABELDEV(dev)	\
    253 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    254 
    255 /* declared here, and made public, for the benefit of KVM stuff.. */
    256 struct raid_softc *raid_softc;
    257 
    258 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    259 				     struct disklabel *));
    260 static void raidgetdisklabel __P((dev_t));
    261 static void raidmakedisklabel __P((struct raid_softc *));
    262 
    263 static int raidlock __P((struct raid_softc *));
    264 static void raidunlock __P((struct raid_softc *));
    265 
    266 static void rf_markalldirty __P((RF_Raid_t *));
    267 void rf_mountroot_hook __P((struct device *));
    268 
    269 struct device *raidrootdev;
    270 struct cfdata cf_raidrootdev;
    271 struct cfdriver cfdrv;
    272 /* XXX these should be moved up */
    273 #include "rf_configure.h"
    274 #include <sys/reboot.h>
    275 
    276 void rf_ReconThread __P((struct rf_recon_req *));
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    279 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    280 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    281 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    282 void rf_buildroothack __P((void *));
    283 
    284 RF_AutoConfig_t *rf_find_raid_components __P((void));
    285 void print_component_label __P((RF_ComponentLabel_t *));
    286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    290 				  RF_Raid_t *));
    291 int rf_set_autoconfig __P((RF_Raid_t *, int));
    292 int rf_set_rootpartition __P((RF_Raid_t *, int));
    293 void rf_release_all_vps __P((RF_ConfigSet_t *));
    294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    295 int rf_have_enough_components __P((RF_ConfigSet_t *));
    296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place */
    300 /* XXX ugly hack. */
    301 const char *raid_rooty = "raid0";
    302 extern struct device *booted_device;
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    311 	RF_ConfigSet_t *config_sets;
    312 
    313 #ifdef DEBUG
    314 	printf("raidattach: Asked for %d units\n", num);
    315 #endif
    316 
    317 	if (num <= 0) {
    318 #ifdef DIAGNOSTIC
    319 		panic("raidattach: count <= 0");
    320 #endif
    321 		return;
    322 	}
    323 	/* This is where all the initialization stuff gets done. */
    324 
    325 	numraid = num;
    326 
    327 	/* Make some space for requested number of units... */
    328 
    329 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    330 	if (raidPtrs == NULL) {
    331 		panic("raidPtrs is NULL!!\n");
    332 	}
    333 
    334 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    335 	if (rc) {
    336 		RF_PANIC();
    337 	}
    338 
    339 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    340 
    341 	for (i = 0; i < numraid; i++)
    342 		raidPtrs[i] = NULL;
    343 	rc = rf_BootRaidframe();
    344 	if (rc == 0)
    345 		printf("Kernelized RAIDframe activated\n");
    346 	else
    347 		panic("Serious error booting RAID!!\n");
    348 
    349 	/* put together some datastructures like the CCD device does.. This
    350 	 * lets us lock the device and what-not when it gets opened. */
    351 
    352 	raid_softc = (struct raid_softc *)
    353 		malloc(num * sizeof(struct raid_softc),
    354 		       M_RAIDFRAME, M_NOWAIT);
    355 	if (raid_softc == NULL) {
    356 		printf("WARNING: no memory for RAIDframe driver\n");
    357 		return;
    358 	}
    359 
    360 	bzero(raid_softc, num * sizeof(struct raid_softc));
    361 
    362 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    363 					      M_RAIDFRAME, M_NOWAIT);
    364 	if (raidrootdev == NULL) {
    365 		panic("No memory for RAIDframe driver!!?!?!\n");
    366 	}
    367 
    368 	for (raidID = 0; raidID < num; raidID++) {
    369 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    370 
    371 		raidrootdev[raidID].dv_class  = DV_DISK;
    372 		raidrootdev[raidID].dv_cfdata = NULL;
    373 		raidrootdev[raidID].dv_unit   = raidID;
    374 		raidrootdev[raidID].dv_parent = NULL;
    375 		raidrootdev[raidID].dv_flags  = 0;
    376 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    377 
    378 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    379 			  (RF_Raid_t *));
    380 		if (raidPtrs[raidID] == NULL) {
    381 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    382 			numraid = raidID;
    383 			return;
    384 		}
    385 	}
    386 
    387 if (raidautoconfig) {
    388 	/* 1. locate all RAID components on the system */
    389 
    390 #if DEBUG
    391 	printf("Searching for raid components...\n");
    392 #endif
    393 	ac_list = rf_find_raid_components();
    394 
    395 	/* 2. sort them into their respective sets */
    396 
    397 	config_sets = rf_create_auto_sets(ac_list);
    398 
    399 	/* 3. evaluate each set and configure the valid ones
    400 	   This gets done in rf_buildroothack() */
    401 
    402 	/* schedule the creation of the thread to do the
    403 	   "/ on RAID" stuff */
    404 
    405 	kthread_create(rf_buildroothack,config_sets);
    406 
    407 	/* 4. make sure we get our mud.. I mean root.. hooks in.. */
    408 	/* XXXX pick raid0 for now... and this should be only done
    409 	   if we find something that's bootable!!! */
    410 #if 0
    411 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    412 #endif
    413 	if (boothowto & RB_ASKNAME) {
    414 		/* We don't auto-config... */
    415 	} else {
    416 		/* They didn't ask, and we found something bootable... */
    417 		/* XXX pretend for now.. */
    418 #if 0
    419  		booted_device = &raidrootdev[0];
    420 #endif
    421 	}
    422 }
    423 
    424 }
    425 
    426 void
    427 rf_buildroothack(arg)
    428 	void *arg;
    429 {
    430 	RF_ConfigSet_t *config_sets = arg;
    431 	RF_ConfigSet_t *cset;
    432 	RF_ConfigSet_t *next_cset;
    433 	int retcode;
    434 	int raidID;
    435 	int rootID;
    436 	int num_root;
    437 
    438 	num_root = 0;
    439 	cset = config_sets;
    440 	while(cset != NULL ) {
    441 		next_cset = cset->next;
    442 		if (rf_have_enough_components(cset) &&
    443 		    cset->ac->clabel->autoconfigure==1) {
    444 			retcode = rf_auto_config_set(cset,&raidID);
    445 			if (!retcode) {
    446 				if (cset->rootable) {
    447 					rootID = raidID;
    448 					num_root++;
    449 				}
    450 			} else {
    451 				/* The autoconfig didn't work :( */
    452 #if DEBUG
    453 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    454 #endif
    455 				rf_release_all_vps(cset);
    456 #if DEBUG
    457 				printf("Done cleanup\n");
    458 #endif
    459 			}
    460 		} else {
    461 			/* we're not autoconfiguring this set...
    462 			   release the associated resources */
    463 #if DEBUG
    464 			printf("Releasing vp's\n");
    465 #endif
    466 			rf_release_all_vps(cset);
    467 #if DEBUG
    468 			printf("Done.\n");
    469 #endif
    470 		}
    471 		/* cleanup */
    472 #if DEBUG
    473 		printf("Cleaning up config set\n");
    474 #endif
    475 		rf_cleanup_config_set(cset);
    476 #if DEBUG
    477 		printf("Done cleanup\n");
    478 #endif
    479 		cset = next_cset;
    480 	}
    481 	if (boothowto & RB_ASKNAME) {
    482 		/* We don't auto-config... */
    483 	} else {
    484 		/* They didn't ask, and we found something bootable... */
    485 		/* XXX pretend for now.. */
    486 		if (num_root == 1) {
    487 #if 1
    488 			booted_device = &raidrootdev[rootID];
    489 #endif
    490 		} else if (num_root > 1) {
    491 			/* we can't guess.. require the user to answer... */
    492 			boothowto |= RB_ASKNAME;
    493 		}
    494 	}
    495 }
    496 
    497 
    498 int
    499 raidsize(dev)
    500 	dev_t   dev;
    501 {
    502 	struct raid_softc *rs;
    503 	struct disklabel *lp;
    504 	int     part, unit, omask, size;
    505 
    506 	unit = raidunit(dev);
    507 	if (unit >= numraid)
    508 		return (-1);
    509 	rs = &raid_softc[unit];
    510 
    511 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    512 		return (-1);
    513 
    514 	part = DISKPART(dev);
    515 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    516 	lp = rs->sc_dkdev.dk_label;
    517 
    518 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    519 		return (-1);
    520 
    521 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    522 		size = -1;
    523 	else
    524 		size = lp->d_partitions[part].p_size *
    525 		    (lp->d_secsize / DEV_BSIZE);
    526 
    527 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    528 		return (-1);
    529 
    530 	return (size);
    531 
    532 }
    533 
    534 int
    535 raiddump(dev, blkno, va, size)
    536 	dev_t   dev;
    537 	daddr_t blkno;
    538 	caddr_t va;
    539 	size_t  size;
    540 {
    541 	/* Not implemented. */
    542 	return ENXIO;
    543 }
    544 /* ARGSUSED */
    545 int
    546 raidopen(dev, flags, fmt, p)
    547 	dev_t   dev;
    548 	int     flags, fmt;
    549 	struct proc *p;
    550 {
    551 	int     unit = raidunit(dev);
    552 	struct raid_softc *rs;
    553 	struct disklabel *lp;
    554 	int     part, pmask;
    555 	int     error = 0;
    556 
    557 	if (unit >= numraid)
    558 		return (ENXIO);
    559 	rs = &raid_softc[unit];
    560 
    561 	if ((error = raidlock(rs)) != 0)
    562 		return (error);
    563 	lp = rs->sc_dkdev.dk_label;
    564 
    565 	part = DISKPART(dev);
    566 	pmask = (1 << part);
    567 
    568 	db1_printf(("Opening raid device number: %d partition: %d\n",
    569 		unit, part));
    570 
    571 
    572 	if ((rs->sc_flags & RAIDF_INITED) &&
    573 	    (rs->sc_dkdev.dk_openmask == 0))
    574 		raidgetdisklabel(dev);
    575 
    576 	/* make sure that this partition exists */
    577 
    578 	if (part != RAW_PART) {
    579 		db1_printf(("Not a raw partition..\n"));
    580 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    581 		    ((part >= lp->d_npartitions) ||
    582 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    583 			error = ENXIO;
    584 			raidunlock(rs);
    585 			db1_printf(("Bailing out...\n"));
    586 			return (error);
    587 		}
    588 	}
    589 	/* Prevent this unit from being unconfigured while open. */
    590 	switch (fmt) {
    591 	case S_IFCHR:
    592 		rs->sc_dkdev.dk_copenmask |= pmask;
    593 		break;
    594 
    595 	case S_IFBLK:
    596 		rs->sc_dkdev.dk_bopenmask |= pmask;
    597 		break;
    598 	}
    599 
    600 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    601 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    602 		/* First one... mark things as dirty... Note that we *MUST*
    603 		 have done a configure before this.  I DO NOT WANT TO BE
    604 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    605 		 THAT THEY BELONG TOGETHER!!!!! */
    606 		/* XXX should check to see if we're only open for reading
    607 		   here... If so, we needn't do this, but then need some
    608 		   other way of keeping track of what's happened.. */
    609 
    610 		rf_markalldirty( raidPtrs[unit] );
    611 	}
    612 
    613 
    614 	rs->sc_dkdev.dk_openmask =
    615 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    616 
    617 	raidunlock(rs);
    618 
    619 	return (error);
    620 
    621 
    622 }
    623 /* ARGSUSED */
    624 int
    625 raidclose(dev, flags, fmt, p)
    626 	dev_t   dev;
    627 	int     flags, fmt;
    628 	struct proc *p;
    629 {
    630 	int     unit = raidunit(dev);
    631 	struct raid_softc *rs;
    632 	int     error = 0;
    633 	int     part;
    634 
    635 	if (unit >= numraid)
    636 		return (ENXIO);
    637 	rs = &raid_softc[unit];
    638 
    639 	if ((error = raidlock(rs)) != 0)
    640 		return (error);
    641 
    642 	part = DISKPART(dev);
    643 
    644 	/* ...that much closer to allowing unconfiguration... */
    645 	switch (fmt) {
    646 	case S_IFCHR:
    647 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    648 		break;
    649 
    650 	case S_IFBLK:
    651 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    652 		break;
    653 	}
    654 	rs->sc_dkdev.dk_openmask =
    655 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    656 
    657 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    658 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    659 		/* Last one... device is not unconfigured yet.
    660 		   Device shutdown has taken care of setting the
    661 		   clean bits if RAIDF_INITED is not set
    662 		   mark things as clean... */
    663 #ifdef DEBUG
    664 		printf("Last one on raid%d.  Updating status.\n",unit);
    665 #endif
    666 		rf_update_component_labels( raidPtrs[unit] );
    667 	}
    668 
    669 	raidunlock(rs);
    670 	return (0);
    671 
    672 }
    673 
    674 void
    675 raidstrategy(bp)
    676 	register struct buf *bp;
    677 {
    678 	register int s;
    679 
    680 	unsigned int raidID = raidunit(bp->b_dev);
    681 	RF_Raid_t *raidPtr;
    682 	struct raid_softc *rs = &raid_softc[raidID];
    683 	struct disklabel *lp;
    684 	int     wlabel;
    685 
    686 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    687 		bp->b_error = ENXIO;
    688 		bp->b_flags = B_ERROR;
    689 		bp->b_resid = bp->b_bcount;
    690 		biodone(bp);
    691 		return;
    692 	}
    693 	if (raidID >= numraid || !raidPtrs[raidID]) {
    694 		bp->b_error = ENODEV;
    695 		bp->b_flags |= B_ERROR;
    696 		bp->b_resid = bp->b_bcount;
    697 		biodone(bp);
    698 		return;
    699 	}
    700 	raidPtr = raidPtrs[raidID];
    701 	if (!raidPtr->valid) {
    702 		bp->b_error = ENODEV;
    703 		bp->b_flags |= B_ERROR;
    704 		bp->b_resid = bp->b_bcount;
    705 		biodone(bp);
    706 		return;
    707 	}
    708 	if (bp->b_bcount == 0) {
    709 		db1_printf(("b_bcount is zero..\n"));
    710 		biodone(bp);
    711 		return;
    712 	}
    713 	lp = rs->sc_dkdev.dk_label;
    714 
    715 	/*
    716 	 * Do bounds checking and adjust transfer.  If there's an
    717 	 * error, the bounds check will flag that for us.
    718 	 */
    719 
    720 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    721 	if (DISKPART(bp->b_dev) != RAW_PART)
    722 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    723 			db1_printf(("Bounds check failed!!:%d %d\n",
    724 				(int) bp->b_blkno, (int) wlabel));
    725 			biodone(bp);
    726 			return;
    727 		}
    728 	s = splbio();
    729 
    730 	bp->b_resid = 0;
    731 
    732 	/* stuff it onto our queue */
    733 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    734 
    735 	raidstart(raidPtrs[raidID]);
    736 
    737 	splx(s);
    738 }
    739 /* ARGSUSED */
    740 int
    741 raidread(dev, uio, flags)
    742 	dev_t   dev;
    743 	struct uio *uio;
    744 	int     flags;
    745 {
    746 	int     unit = raidunit(dev);
    747 	struct raid_softc *rs;
    748 	int     part;
    749 
    750 	if (unit >= numraid)
    751 		return (ENXIO);
    752 	rs = &raid_softc[unit];
    753 
    754 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    755 		return (ENXIO);
    756 	part = DISKPART(dev);
    757 
    758 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    759 
    760 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    761 
    762 }
    763 /* ARGSUSED */
    764 int
    765 raidwrite(dev, uio, flags)
    766 	dev_t   dev;
    767 	struct uio *uio;
    768 	int     flags;
    769 {
    770 	int     unit = raidunit(dev);
    771 	struct raid_softc *rs;
    772 
    773 	if (unit >= numraid)
    774 		return (ENXIO);
    775 	rs = &raid_softc[unit];
    776 
    777 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    778 		return (ENXIO);
    779 	db1_printf(("raidwrite\n"));
    780 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    781 
    782 }
    783 
    784 int
    785 raidioctl(dev, cmd, data, flag, p)
    786 	dev_t   dev;
    787 	u_long  cmd;
    788 	caddr_t data;
    789 	int     flag;
    790 	struct proc *p;
    791 {
    792 	int     unit = raidunit(dev);
    793 	int     error = 0;
    794 	int     part, pmask;
    795 	struct raid_softc *rs;
    796 	RF_Config_t *k_cfg, *u_cfg;
    797 	RF_Raid_t *raidPtr;
    798 	RF_RaidDisk_t *diskPtr;
    799 	RF_AccTotals_t *totals;
    800 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    801 	u_char *specific_buf;
    802 	int retcode = 0;
    803 	int row;
    804 	int column;
    805 	struct rf_recon_req *rrcopy, *rr;
    806 	RF_ComponentLabel_t *clabel;
    807 	RF_ComponentLabel_t ci_label;
    808 	RF_ComponentLabel_t **clabel_ptr;
    809 	RF_SingleComponent_t *sparePtr,*componentPtr;
    810 	RF_SingleComponent_t hot_spare;
    811 	RF_SingleComponent_t component;
    812 	int i, j, d;
    813 
    814 	if (unit >= numraid)
    815 		return (ENXIO);
    816 	rs = &raid_softc[unit];
    817 	raidPtr = raidPtrs[unit];
    818 
    819 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    820 		(int) DISKPART(dev), (int) unit, (int) cmd));
    821 
    822 	/* Must be open for writes for these commands... */
    823 	switch (cmd) {
    824 	case DIOCSDINFO:
    825 	case DIOCWDINFO:
    826 	case DIOCWLABEL:
    827 		if ((flag & FWRITE) == 0)
    828 			return (EBADF);
    829 	}
    830 
    831 	/* Must be initialized for these... */
    832 	switch (cmd) {
    833 	case DIOCGDINFO:
    834 	case DIOCSDINFO:
    835 	case DIOCWDINFO:
    836 	case DIOCGPART:
    837 	case DIOCWLABEL:
    838 	case DIOCGDEFLABEL:
    839 	case RAIDFRAME_SHUTDOWN:
    840 	case RAIDFRAME_REWRITEPARITY:
    841 	case RAIDFRAME_GET_INFO:
    842 	case RAIDFRAME_RESET_ACCTOTALS:
    843 	case RAIDFRAME_GET_ACCTOTALS:
    844 	case RAIDFRAME_KEEP_ACCTOTALS:
    845 	case RAIDFRAME_GET_SIZE:
    846 	case RAIDFRAME_FAIL_DISK:
    847 	case RAIDFRAME_COPYBACK:
    848 	case RAIDFRAME_CHECK_RECON_STATUS:
    849 	case RAIDFRAME_GET_COMPONENT_LABEL:
    850 	case RAIDFRAME_SET_COMPONENT_LABEL:
    851 	case RAIDFRAME_ADD_HOT_SPARE:
    852 	case RAIDFRAME_REMOVE_HOT_SPARE:
    853 	case RAIDFRAME_INIT_LABELS:
    854 	case RAIDFRAME_REBUILD_IN_PLACE:
    855 	case RAIDFRAME_CHECK_PARITY:
    856 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    857 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    858 	case RAIDFRAME_SET_AUTOCONFIG:
    859 	case RAIDFRAME_SET_ROOT:
    860 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    861 			return (ENXIO);
    862 	}
    863 
    864 	switch (cmd) {
    865 
    866 		/* configure the system */
    867 	case RAIDFRAME_CONFIGURE:
    868 
    869 		if (raidPtr->valid) {
    870 			/* There is a valid RAID set running on this unit! */
    871 			printf("raid%d: Device already configured!\n",unit);
    872 		}
    873 
    874 		/* copy-in the configuration information */
    875 		/* data points to a pointer to the configuration structure */
    876 
    877 		u_cfg = *((RF_Config_t **) data);
    878 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    879 		if (k_cfg == NULL) {
    880 			return (ENOMEM);
    881 		}
    882 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    883 		    sizeof(RF_Config_t));
    884 		if (retcode) {
    885 			RF_Free(k_cfg, sizeof(RF_Config_t));
    886 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    887 				retcode));
    888 			return (retcode);
    889 		}
    890 		/* allocate a buffer for the layout-specific data, and copy it
    891 		 * in */
    892 		if (k_cfg->layoutSpecificSize) {
    893 			if (k_cfg->layoutSpecificSize > 10000) {
    894 				/* sanity check */
    895 				RF_Free(k_cfg, sizeof(RF_Config_t));
    896 				return (EINVAL);
    897 			}
    898 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    899 			    (u_char *));
    900 			if (specific_buf == NULL) {
    901 				RF_Free(k_cfg, sizeof(RF_Config_t));
    902 				return (ENOMEM);
    903 			}
    904 			retcode = copyin(k_cfg->layoutSpecific,
    905 			    (caddr_t) specific_buf,
    906 			    k_cfg->layoutSpecificSize);
    907 			if (retcode) {
    908 				RF_Free(k_cfg, sizeof(RF_Config_t));
    909 				RF_Free(specific_buf,
    910 					k_cfg->layoutSpecificSize);
    911 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    912 					retcode));
    913 				return (retcode);
    914 			}
    915 		} else
    916 			specific_buf = NULL;
    917 		k_cfg->layoutSpecific = specific_buf;
    918 
    919 		/* should do some kind of sanity check on the configuration.
    920 		 * Store the sum of all the bytes in the last byte? */
    921 
    922 		/* configure the system */
    923 
    924 		/*
    925 		 * Clear the entire RAID descriptor, just to make sure
    926 		 *  there is no stale data left in the case of a
    927 		 *  reconfiguration
    928 		 */
    929 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    930 		raidPtr->raidid = unit;
    931 
    932 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    933 
    934 		if (retcode == 0) {
    935 
    936 			/* allow this many simultaneous IO's to
    937 			   this RAID device */
    938 			raidPtr->openings = RAIDOUTSTANDING;
    939 
    940 			retcode = raidinit(dev, raidPtr, unit);
    941 			rf_markalldirty( raidPtr );
    942 		}
    943 		/* free the buffers.  No return code here. */
    944 		if (k_cfg->layoutSpecificSize) {
    945 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    946 		}
    947 		RF_Free(k_cfg, sizeof(RF_Config_t));
    948 
    949 		return (retcode);
    950 
    951 		/* shutdown the system */
    952 	case RAIDFRAME_SHUTDOWN:
    953 
    954 		if ((error = raidlock(rs)) != 0)
    955 			return (error);
    956 
    957 		/*
    958 		 * If somebody has a partition mounted, we shouldn't
    959 		 * shutdown.
    960 		 */
    961 
    962 		part = DISKPART(dev);
    963 		pmask = (1 << part);
    964 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    965 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    966 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    967 			raidunlock(rs);
    968 			return (EBUSY);
    969 		}
    970 
    971 		retcode = rf_Shutdown(raidPtr);
    972 
    973 		pool_destroy(&rs->sc_cbufpool);
    974 
    975 		/* It's no longer initialized... */
    976 		rs->sc_flags &= ~RAIDF_INITED;
    977 
    978 		/* Detach the disk. */
    979 		disk_detach(&rs->sc_dkdev);
    980 
    981 		raidunlock(rs);
    982 
    983 		return (retcode);
    984 	case RAIDFRAME_GET_COMPONENT_LABEL:
    985 		clabel_ptr = (RF_ComponentLabel_t **) data;
    986 		/* need to read the component label for the disk indicated
    987 		   by row,column in clabel */
    988 
    989 		/* For practice, let's get it directly fromdisk, rather
    990 		   than from the in-core copy */
    991 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    992 			   (RF_ComponentLabel_t *));
    993 		if (clabel == NULL)
    994 			return (ENOMEM);
    995 
    996 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    997 
    998 		retcode = copyin( *clabel_ptr, clabel,
    999 				  sizeof(RF_ComponentLabel_t));
   1000 
   1001 		if (retcode) {
   1002 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1003 			return(retcode);
   1004 		}
   1005 
   1006 		row = clabel->row;
   1007 		column = clabel->column;
   1008 
   1009 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1010 		    (column < 0) || (column >= raidPtr->numCol)) {
   1011 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1012 			return(EINVAL);
   1013 		}
   1014 
   1015 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1016 				raidPtr->raid_cinfo[row][column].ci_vp,
   1017 				clabel );
   1018 
   1019 		retcode = copyout((caddr_t) clabel,
   1020 				  (caddr_t) *clabel_ptr,
   1021 				  sizeof(RF_ComponentLabel_t));
   1022 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1023 		return (retcode);
   1024 
   1025 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1026 		clabel = (RF_ComponentLabel_t *) data;
   1027 
   1028 		/* XXX check the label for valid stuff... */
   1029 		/* Note that some things *should not* get modified --
   1030 		   the user should be re-initing the labels instead of
   1031 		   trying to patch things.
   1032 		   */
   1033 
   1034 		printf("Got component label:\n");
   1035 		printf("Version: %d\n",clabel->version);
   1036 		printf("Serial Number: %d\n",clabel->serial_number);
   1037 		printf("Mod counter: %d\n",clabel->mod_counter);
   1038 		printf("Row: %d\n", clabel->row);
   1039 		printf("Column: %d\n", clabel->column);
   1040 		printf("Num Rows: %d\n", clabel->num_rows);
   1041 		printf("Num Columns: %d\n", clabel->num_columns);
   1042 		printf("Clean: %d\n", clabel->clean);
   1043 		printf("Status: %d\n", clabel->status);
   1044 
   1045 		row = clabel->row;
   1046 		column = clabel->column;
   1047 
   1048 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1049 		    (column < 0) || (column >= raidPtr->numCol)) {
   1050 			return(EINVAL);
   1051 		}
   1052 
   1053 		/* XXX this isn't allowed to do anything for now :-) */
   1054 
   1055 		/* XXX and before it is, we need to fill in the rest
   1056 		   of the fields!?!?!?! */
   1057 #if 0
   1058 		raidwrite_component_label(
   1059                             raidPtr->Disks[row][column].dev,
   1060 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1061 			    clabel );
   1062 #endif
   1063 		return (0);
   1064 
   1065 	case RAIDFRAME_INIT_LABELS:
   1066 		clabel = (RF_ComponentLabel_t *) data;
   1067 		/*
   1068 		   we only want the serial number from
   1069 		   the above.  We get all the rest of the information
   1070 		   from the config that was used to create this RAID
   1071 		   set.
   1072 		   */
   1073 
   1074 		raidPtr->serial_number = clabel->serial_number;
   1075 
   1076 		raid_init_component_label(raidPtr, &ci_label);
   1077 		ci_label.serial_number = clabel->serial_number;
   1078 
   1079 		for(row=0;row<raidPtr->numRow;row++) {
   1080 			ci_label.row = row;
   1081 			for(column=0;column<raidPtr->numCol;column++) {
   1082 				diskPtr = &raidPtr->Disks[row][column];
   1083 				ci_label.blockSize = diskPtr->blockSize;
   1084 				ci_label.partitionSize = diskPtr->partitionSize;
   1085 				ci_label.column = column;
   1086 				raidwrite_component_label(
   1087 				  raidPtr->Disks[row][column].dev,
   1088 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1089 				  &ci_label );
   1090 			}
   1091 		}
   1092 
   1093 		return (retcode);
   1094 	case RAIDFRAME_SET_AUTOCONFIG:
   1095 		d = rf_set_autoconfig(raidPtr, *data);
   1096 		printf("New autoconfig value is: %d\n", d);
   1097 		*data = d;
   1098 		return (retcode);
   1099 
   1100 	case RAIDFRAME_SET_ROOT:
   1101 		d = rf_set_rootpartition(raidPtr, *data);
   1102 		printf("New rootpartition value is: %d\n", d);
   1103 		*data = d;
   1104 		return (retcode);
   1105 
   1106 		/* initialize all parity */
   1107 	case RAIDFRAME_REWRITEPARITY:
   1108 
   1109 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1110 			/* Parity for RAID 0 is trivially correct */
   1111 			raidPtr->parity_good = RF_RAID_CLEAN;
   1112 			return(0);
   1113 		}
   1114 
   1115 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1116 			/* Re-write is already in progress! */
   1117 			return(EINVAL);
   1118 		}
   1119 
   1120 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1121 					   rf_RewriteParityThread,
   1122 					   raidPtr,"raid_parity");
   1123 		return (retcode);
   1124 
   1125 
   1126 	case RAIDFRAME_ADD_HOT_SPARE:
   1127 		sparePtr = (RF_SingleComponent_t *) data;
   1128 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1129 		printf("Adding spare\n");
   1130 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1131 		return(retcode);
   1132 
   1133 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1134 		return(retcode);
   1135 
   1136 	case RAIDFRAME_REBUILD_IN_PLACE:
   1137 
   1138 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1139 			/* Can't do this on a RAID 0!! */
   1140 			return(EINVAL);
   1141 		}
   1142 
   1143 		if (raidPtr->recon_in_progress == 1) {
   1144 			/* a reconstruct is already in progress! */
   1145 			return(EINVAL);
   1146 		}
   1147 
   1148 		componentPtr = (RF_SingleComponent_t *) data;
   1149 		memcpy( &component, componentPtr,
   1150 			sizeof(RF_SingleComponent_t));
   1151 		row = component.row;
   1152 		column = component.column;
   1153 		printf("Rebuild: %d %d\n",row, column);
   1154 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1155 		    (column < 0) || (column >= raidPtr->numCol)) {
   1156 			return(EINVAL);
   1157 		}
   1158 
   1159 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1160 		if (rrcopy == NULL)
   1161 			return(ENOMEM);
   1162 
   1163 		rrcopy->raidPtr = (void *) raidPtr;
   1164 		rrcopy->row = row;
   1165 		rrcopy->col = column;
   1166 
   1167 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1168 					   rf_ReconstructInPlaceThread,
   1169 					   rrcopy,"raid_reconip");
   1170 		return(retcode);
   1171 
   1172 	case RAIDFRAME_GET_INFO:
   1173 		if (!raidPtr->valid)
   1174 			return (ENODEV);
   1175 		ucfgp = (RF_DeviceConfig_t **) data;
   1176 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1177 			  (RF_DeviceConfig_t *));
   1178 		if (d_cfg == NULL)
   1179 			return (ENOMEM);
   1180 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1181 		d_cfg->rows = raidPtr->numRow;
   1182 		d_cfg->cols = raidPtr->numCol;
   1183 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1184 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1185 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1186 			return (ENOMEM);
   1187 		}
   1188 		d_cfg->nspares = raidPtr->numSpare;
   1189 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1190 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1191 			return (ENOMEM);
   1192 		}
   1193 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1194 		d = 0;
   1195 		for (i = 0; i < d_cfg->rows; i++) {
   1196 			for (j = 0; j < d_cfg->cols; j++) {
   1197 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1198 				d++;
   1199 			}
   1200 		}
   1201 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1202 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1203 		}
   1204 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1205 				  sizeof(RF_DeviceConfig_t));
   1206 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1207 
   1208 		return (retcode);
   1209 
   1210 	case RAIDFRAME_CHECK_PARITY:
   1211 		*(int *) data = raidPtr->parity_good;
   1212 		return (0);
   1213 
   1214 	case RAIDFRAME_RESET_ACCTOTALS:
   1215 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1216 		return (0);
   1217 
   1218 	case RAIDFRAME_GET_ACCTOTALS:
   1219 		totals = (RF_AccTotals_t *) data;
   1220 		*totals = raidPtr->acc_totals;
   1221 		return (0);
   1222 
   1223 	case RAIDFRAME_KEEP_ACCTOTALS:
   1224 		raidPtr->keep_acc_totals = *(int *)data;
   1225 		return (0);
   1226 
   1227 	case RAIDFRAME_GET_SIZE:
   1228 		*(int *) data = raidPtr->totalSectors;
   1229 		return (0);
   1230 
   1231 		/* fail a disk & optionally start reconstruction */
   1232 	case RAIDFRAME_FAIL_DISK:
   1233 
   1234 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1235 			/* Can't do this on a RAID 0!! */
   1236 			return(EINVAL);
   1237 		}
   1238 
   1239 		rr = (struct rf_recon_req *) data;
   1240 
   1241 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1242 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1243 			return (EINVAL);
   1244 
   1245 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1246 		       unit, rr->row, rr->col);
   1247 
   1248 		/* make a copy of the recon request so that we don't rely on
   1249 		 * the user's buffer */
   1250 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1251 		if (rrcopy == NULL)
   1252 			return(ENOMEM);
   1253 		bcopy(rr, rrcopy, sizeof(*rr));
   1254 		rrcopy->raidPtr = (void *) raidPtr;
   1255 
   1256 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1257 					   rf_ReconThread,
   1258 					   rrcopy,"raid_recon");
   1259 		return (0);
   1260 
   1261 		/* invoke a copyback operation after recon on whatever disk
   1262 		 * needs it, if any */
   1263 	case RAIDFRAME_COPYBACK:
   1264 
   1265 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1266 			/* This makes no sense on a RAID 0!! */
   1267 			return(EINVAL);
   1268 		}
   1269 
   1270 		if (raidPtr->copyback_in_progress == 1) {
   1271 			/* Copyback is already in progress! */
   1272 			return(EINVAL);
   1273 		}
   1274 
   1275 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1276 					   rf_CopybackThread,
   1277 					   raidPtr,"raid_copyback");
   1278 		return (retcode);
   1279 
   1280 		/* return the percentage completion of reconstruction */
   1281 	case RAIDFRAME_CHECK_RECON_STATUS:
   1282 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1283 			/* This makes no sense on a RAID 0 */
   1284 			return(EINVAL);
   1285 		}
   1286 		row = 0; /* XXX we only consider a single row... */
   1287 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1288 			*(int *) data = 100;
   1289 		else
   1290 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1291 		return (0);
   1292 
   1293 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1294 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1295 			/* This makes no sense on a RAID 0 */
   1296 			return(EINVAL);
   1297 		}
   1298 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1299 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1300 		} else {
   1301 			*(int *) data = 100;
   1302 		}
   1303 		return (0);
   1304 
   1305 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1306 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1307 			/* This makes no sense on a RAID 0 */
   1308 			return(EINVAL);
   1309 		}
   1310 		if (raidPtr->copyback_in_progress == 1) {
   1311 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1312 				raidPtr->Layout.numStripe;
   1313 		} else {
   1314 			*(int *) data = 100;
   1315 		}
   1316 		return (0);
   1317 
   1318 
   1319 		/* the sparetable daemon calls this to wait for the kernel to
   1320 		 * need a spare table. this ioctl does not return until a
   1321 		 * spare table is needed. XXX -- calling mpsleep here in the
   1322 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1323 		 * -- I should either compute the spare table in the kernel,
   1324 		 * or have a different -- XXX XXX -- interface (a different
   1325 		 * character device) for delivering the table     -- XXX */
   1326 #if 0
   1327 	case RAIDFRAME_SPARET_WAIT:
   1328 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1329 		while (!rf_sparet_wait_queue)
   1330 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1331 		waitreq = rf_sparet_wait_queue;
   1332 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1333 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1334 
   1335 		/* structure assignment */
   1336 		*((RF_SparetWait_t *) data) = *waitreq;
   1337 
   1338 		RF_Free(waitreq, sizeof(*waitreq));
   1339 		return (0);
   1340 
   1341 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1342 		 * code in it that will cause the dameon to exit */
   1343 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1344 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1345 		waitreq->fcol = -1;
   1346 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1347 		waitreq->next = rf_sparet_wait_queue;
   1348 		rf_sparet_wait_queue = waitreq;
   1349 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1350 		wakeup(&rf_sparet_wait_queue);
   1351 		return (0);
   1352 
   1353 		/* used by the spare table daemon to deliver a spare table
   1354 		 * into the kernel */
   1355 	case RAIDFRAME_SEND_SPARET:
   1356 
   1357 		/* install the spare table */
   1358 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1359 
   1360 		/* respond to the requestor.  the return status of the spare
   1361 		 * table installation is passed in the "fcol" field */
   1362 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1363 		waitreq->fcol = retcode;
   1364 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1365 		waitreq->next = rf_sparet_resp_queue;
   1366 		rf_sparet_resp_queue = waitreq;
   1367 		wakeup(&rf_sparet_resp_queue);
   1368 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1369 
   1370 		return (retcode);
   1371 #endif
   1372 
   1373 	default:
   1374 		break; /* fall through to the os-specific code below */
   1375 
   1376 	}
   1377 
   1378 	if (!raidPtr->valid)
   1379 		return (EINVAL);
   1380 
   1381 	/*
   1382 	 * Add support for "regular" device ioctls here.
   1383 	 */
   1384 
   1385 	switch (cmd) {
   1386 	case DIOCGDINFO:
   1387 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1388 		break;
   1389 
   1390 	case DIOCGPART:
   1391 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1392 		((struct partinfo *) data)->part =
   1393 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1394 		break;
   1395 
   1396 	case DIOCWDINFO:
   1397 	case DIOCSDINFO:
   1398 		if ((error = raidlock(rs)) != 0)
   1399 			return (error);
   1400 
   1401 		rs->sc_flags |= RAIDF_LABELLING;
   1402 
   1403 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1404 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1405 		if (error == 0) {
   1406 			if (cmd == DIOCWDINFO)
   1407 				error = writedisklabel(RAIDLABELDEV(dev),
   1408 				    raidstrategy, rs->sc_dkdev.dk_label,
   1409 				    rs->sc_dkdev.dk_cpulabel);
   1410 		}
   1411 		rs->sc_flags &= ~RAIDF_LABELLING;
   1412 
   1413 		raidunlock(rs);
   1414 
   1415 		if (error)
   1416 			return (error);
   1417 		break;
   1418 
   1419 	case DIOCWLABEL:
   1420 		if (*(int *) data != 0)
   1421 			rs->sc_flags |= RAIDF_WLABEL;
   1422 		else
   1423 			rs->sc_flags &= ~RAIDF_WLABEL;
   1424 		break;
   1425 
   1426 	case DIOCGDEFLABEL:
   1427 		raidgetdefaultlabel(raidPtr, rs,
   1428 		    (struct disklabel *) data);
   1429 		break;
   1430 
   1431 	default:
   1432 		retcode = ENOTTY;
   1433 	}
   1434 	return (retcode);
   1435 
   1436 }
   1437 
   1438 
   1439 /* raidinit -- complete the rest of the initialization for the
   1440    RAIDframe device.  */
   1441 
   1442 
   1443 static int
   1444 raidinit(dev, raidPtr, unit)
   1445 	dev_t   dev;
   1446 	RF_Raid_t *raidPtr;
   1447 	int     unit;
   1448 {
   1449 	int     retcode;
   1450 	struct raid_softc *rs;
   1451 
   1452 	retcode = 0;
   1453 
   1454 	rs = &raid_softc[unit];
   1455 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1456 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1457 
   1458 
   1459 	/* XXX should check return code first... */
   1460 	rs->sc_flags |= RAIDF_INITED;
   1461 
   1462 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1463 
   1464 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1465 
   1466 	/* disk_attach actually creates space for the CPU disklabel, among
   1467 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1468 	 * with disklabels. */
   1469 
   1470 	disk_attach(&rs->sc_dkdev);
   1471 
   1472 	/* XXX There may be a weird interaction here between this, and
   1473 	 * protectedSectors, as used in RAIDframe.  */
   1474 
   1475 	rs->sc_size = raidPtr->totalSectors;
   1476 	rs->sc_dev = dev;
   1477 
   1478 	return (retcode);
   1479 }
   1480 
   1481 /* wake up the daemon & tell it to get us a spare table
   1482  * XXX
   1483  * the entries in the queues should be tagged with the raidPtr
   1484  * so that in the extremely rare case that two recons happen at once,
   1485  * we know for which device were requesting a spare table
   1486  * XXX
   1487  *
   1488  * XXX This code is not currently used. GO
   1489  */
   1490 int
   1491 rf_GetSpareTableFromDaemon(req)
   1492 	RF_SparetWait_t *req;
   1493 {
   1494 	int     retcode;
   1495 
   1496 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1497 	req->next = rf_sparet_wait_queue;
   1498 	rf_sparet_wait_queue = req;
   1499 	wakeup(&rf_sparet_wait_queue);
   1500 
   1501 	/* mpsleep unlocks the mutex */
   1502 	while (!rf_sparet_resp_queue) {
   1503 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1504 		    "raidframe getsparetable", 0);
   1505 	}
   1506 	req = rf_sparet_resp_queue;
   1507 	rf_sparet_resp_queue = req->next;
   1508 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1509 
   1510 	retcode = req->fcol;
   1511 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1512 					 * alloc'd */
   1513 	return (retcode);
   1514 }
   1515 
   1516 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1517  * bp & passes it down.
   1518  * any calls originating in the kernel must use non-blocking I/O
   1519  * do some extra sanity checking to return "appropriate" error values for
   1520  * certain conditions (to make some standard utilities work)
   1521  *
   1522  * Formerly known as: rf_DoAccessKernel
   1523  */
   1524 void
   1525 raidstart(raidPtr)
   1526 	RF_Raid_t *raidPtr;
   1527 {
   1528 	RF_SectorCount_t num_blocks, pb, sum;
   1529 	RF_RaidAddr_t raid_addr;
   1530 	int     retcode;
   1531 	struct partition *pp;
   1532 	daddr_t blocknum;
   1533 	int     unit;
   1534 	struct raid_softc *rs;
   1535 	int     do_async;
   1536 	struct buf *bp;
   1537 
   1538 	unit = raidPtr->raidid;
   1539 	rs = &raid_softc[unit];
   1540 
   1541 	/* Check to see if we're at the limit... */
   1542 	RF_LOCK_MUTEX(raidPtr->mutex);
   1543 	while (raidPtr->openings > 0) {
   1544 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1545 
   1546 		/* get the next item, if any, from the queue */
   1547 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1548 			/* nothing more to do */
   1549 			return;
   1550 		}
   1551 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1552 
   1553 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1554 		 * partition.. Need to make it absolute to the underlying
   1555 		 * device.. */
   1556 
   1557 		blocknum = bp->b_blkno;
   1558 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1559 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1560 			blocknum += pp->p_offset;
   1561 		}
   1562 
   1563 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1564 			    (int) blocknum));
   1565 
   1566 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1567 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1568 
   1569 		/* *THIS* is where we adjust what block we're going to...
   1570 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1571 		raid_addr = blocknum;
   1572 
   1573 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1574 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1575 		sum = raid_addr + num_blocks + pb;
   1576 		if (1 || rf_debugKernelAccess) {
   1577 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1578 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1579 				    (int) pb, (int) bp->b_resid));
   1580 		}
   1581 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1582 		    || (sum < num_blocks) || (sum < pb)) {
   1583 			bp->b_error = ENOSPC;
   1584 			bp->b_flags |= B_ERROR;
   1585 			bp->b_resid = bp->b_bcount;
   1586 			biodone(bp);
   1587 			RF_LOCK_MUTEX(raidPtr->mutex);
   1588 			continue;
   1589 		}
   1590 		/*
   1591 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1592 		 */
   1593 
   1594 		if (bp->b_bcount & raidPtr->sectorMask) {
   1595 			bp->b_error = EINVAL;
   1596 			bp->b_flags |= B_ERROR;
   1597 			bp->b_resid = bp->b_bcount;
   1598 			biodone(bp);
   1599 			RF_LOCK_MUTEX(raidPtr->mutex);
   1600 			continue;
   1601 
   1602 		}
   1603 		db1_printf(("Calling DoAccess..\n"));
   1604 
   1605 
   1606 		RF_LOCK_MUTEX(raidPtr->mutex);
   1607 		raidPtr->openings--;
   1608 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1609 
   1610 		/*
   1611 		 * Everything is async.
   1612 		 */
   1613 		do_async = 1;
   1614 
   1615 		/* don't ever condition on bp->b_flags & B_WRITE.
   1616 		 * always condition on B_READ instead */
   1617 
   1618 		/* XXX we're still at splbio() here... do we *really*
   1619 		   need to be? */
   1620 
   1621 
   1622 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1623 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1624 				      do_async, raid_addr, num_blocks,
   1625 				      bp->b_un.b_addr, bp, NULL, NULL,
   1626 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1627 
   1628 
   1629 		RF_LOCK_MUTEX(raidPtr->mutex);
   1630 	}
   1631 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1632 }
   1633 
   1634 
   1635 
   1636 
   1637 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1638 
   1639 int
   1640 rf_DispatchKernelIO(queue, req)
   1641 	RF_DiskQueue_t *queue;
   1642 	RF_DiskQueueData_t *req;
   1643 {
   1644 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1645 	struct buf *bp;
   1646 	struct raidbuf *raidbp = NULL;
   1647 	struct raid_softc *rs;
   1648 	int     unit;
   1649 	int s;
   1650 
   1651 	s=0;
   1652 	/* s = splbio();*/ /* want to test this */
   1653 	/* XXX along with the vnode, we also need the softc associated with
   1654 	 * this device.. */
   1655 
   1656 	req->queue = queue;
   1657 
   1658 	unit = queue->raidPtr->raidid;
   1659 
   1660 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1661 
   1662 	if (unit >= numraid) {
   1663 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1664 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1665 	}
   1666 	rs = &raid_softc[unit];
   1667 
   1668 	/* XXX is this the right place? */
   1669 	disk_busy(&rs->sc_dkdev);
   1670 
   1671 	bp = req->bp;
   1672 #if 1
   1673 	/* XXX when there is a physical disk failure, someone is passing us a
   1674 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1675 	 * without taking a performance hit... (not sure where the real bug
   1676 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1677 
   1678 	if (bp->b_flags & B_ERROR) {
   1679 		bp->b_flags &= ~B_ERROR;
   1680 	}
   1681 	if (bp->b_error != 0) {
   1682 		bp->b_error = 0;
   1683 	}
   1684 #endif
   1685 	raidbp = RAIDGETBUF(rs);
   1686 
   1687 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1688 
   1689 	/*
   1690 	 * context for raidiodone
   1691 	 */
   1692 	raidbp->rf_obp = bp;
   1693 	raidbp->req = req;
   1694 
   1695 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1696 
   1697 	switch (req->type) {
   1698 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1699 		/* XXX need to do something extra here.. */
   1700 		/* I'm leaving this in, as I've never actually seen it used,
   1701 		 * and I'd like folks to report it... GO */
   1702 		printf(("WAKEUP CALLED\n"));
   1703 		queue->numOutstanding++;
   1704 
   1705 		/* XXX need to glue the original buffer into this??  */
   1706 
   1707 		KernelWakeupFunc(&raidbp->rf_buf);
   1708 		break;
   1709 
   1710 	case RF_IO_TYPE_READ:
   1711 	case RF_IO_TYPE_WRITE:
   1712 
   1713 		if (req->tracerec) {
   1714 			RF_ETIMER_START(req->tracerec->timer);
   1715 		}
   1716 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1717 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1718 		    req->sectorOffset, req->numSector,
   1719 		    req->buf, KernelWakeupFunc, (void *) req,
   1720 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1721 
   1722 		if (rf_debugKernelAccess) {
   1723 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1724 				(long) bp->b_blkno));
   1725 		}
   1726 		queue->numOutstanding++;
   1727 		queue->last_deq_sector = req->sectorOffset;
   1728 		/* acc wouldn't have been let in if there were any pending
   1729 		 * reqs at any other priority */
   1730 		queue->curPriority = req->priority;
   1731 
   1732 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1733 			req->type, unit, queue->row, queue->col));
   1734 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1735 			(int) req->sectorOffset, (int) req->numSector,
   1736 			(int) (req->numSector <<
   1737 			    queue->raidPtr->logBytesPerSector),
   1738 			(int) queue->raidPtr->logBytesPerSector));
   1739 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1740 			raidbp->rf_buf.b_vp->v_numoutput++;
   1741 		}
   1742 		VOP_STRATEGY(&raidbp->rf_buf);
   1743 
   1744 		break;
   1745 
   1746 	default:
   1747 		panic("bad req->type in rf_DispatchKernelIO");
   1748 	}
   1749 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1750 	/* splx(s); */ /* want to test this */
   1751 	return (0);
   1752 }
   1753 /* this is the callback function associated with a I/O invoked from
   1754    kernel code.
   1755  */
   1756 static void
   1757 KernelWakeupFunc(vbp)
   1758 	struct buf *vbp;
   1759 {
   1760 	RF_DiskQueueData_t *req = NULL;
   1761 	RF_DiskQueue_t *queue;
   1762 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1763 	struct buf *bp;
   1764 	struct raid_softc *rs;
   1765 	int     unit;
   1766 	register int s;
   1767 
   1768 	s = splbio();
   1769 	db1_printf(("recovering the request queue:\n"));
   1770 	req = raidbp->req;
   1771 
   1772 	bp = raidbp->rf_obp;
   1773 
   1774 	queue = (RF_DiskQueue_t *) req->queue;
   1775 
   1776 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1777 		bp->b_flags |= B_ERROR;
   1778 		bp->b_error = raidbp->rf_buf.b_error ?
   1779 		    raidbp->rf_buf.b_error : EIO;
   1780 	}
   1781 
   1782 	/* XXX methinks this could be wrong... */
   1783 #if 1
   1784 	bp->b_resid = raidbp->rf_buf.b_resid;
   1785 #endif
   1786 
   1787 	if (req->tracerec) {
   1788 		RF_ETIMER_STOP(req->tracerec->timer);
   1789 		RF_ETIMER_EVAL(req->tracerec->timer);
   1790 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1791 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1792 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1793 		req->tracerec->num_phys_ios++;
   1794 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1795 	}
   1796 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1797 
   1798 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1799 
   1800 
   1801 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1802 	 * ballistic, and mark the component as hosed... */
   1803 
   1804 	if (bp->b_flags & B_ERROR) {
   1805 		/* Mark the disk as dead */
   1806 		/* but only mark it once... */
   1807 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1808 		    rf_ds_optimal) {
   1809 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1810 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1811 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1812 			    rf_ds_failed;
   1813 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1814 			queue->raidPtr->numFailures++;
   1815 			/* XXX here we should bump the version number for each component, and write that data out */
   1816 		} else {	/* Disk is already dead... */
   1817 			/* printf("Disk already marked as dead!\n"); */
   1818 		}
   1819 
   1820 	}
   1821 
   1822 	rs = &raid_softc[unit];
   1823 	RAIDPUTBUF(rs, raidbp);
   1824 
   1825 
   1826 	if (bp->b_resid == 0) {
   1827 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1828 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1829 	}
   1830 
   1831 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1832 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1833 
   1834 	splx(s);
   1835 }
   1836 
   1837 
   1838 
   1839 /*
   1840  * initialize a buf structure for doing an I/O in the kernel.
   1841  */
   1842 static void
   1843 InitBP(
   1844     struct buf * bp,
   1845     struct vnode * b_vp,
   1846     unsigned rw_flag,
   1847     dev_t dev,
   1848     RF_SectorNum_t startSect,
   1849     RF_SectorCount_t numSect,
   1850     caddr_t buf,
   1851     void (*cbFunc) (struct buf *),
   1852     void *cbArg,
   1853     int logBytesPerSector,
   1854     struct proc * b_proc)
   1855 {
   1856 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1857 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1858 	bp->b_bcount = numSect << logBytesPerSector;
   1859 	bp->b_bufsize = bp->b_bcount;
   1860 	bp->b_error = 0;
   1861 	bp->b_dev = dev;
   1862 	bp->b_un.b_addr = buf;
   1863 	bp->b_blkno = startSect;
   1864 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1865 	if (bp->b_bcount == 0) {
   1866 		panic("bp->b_bcount is zero in InitBP!!\n");
   1867 	}
   1868 	bp->b_proc = b_proc;
   1869 	bp->b_iodone = cbFunc;
   1870 	bp->b_vp = b_vp;
   1871 
   1872 }
   1873 
   1874 static void
   1875 raidgetdefaultlabel(raidPtr, rs, lp)
   1876 	RF_Raid_t *raidPtr;
   1877 	struct raid_softc *rs;
   1878 	struct disklabel *lp;
   1879 {
   1880 	db1_printf(("Building a default label...\n"));
   1881 	bzero(lp, sizeof(*lp));
   1882 
   1883 	/* fabricate a label... */
   1884 	lp->d_secperunit = raidPtr->totalSectors;
   1885 	lp->d_secsize = raidPtr->bytesPerSector;
   1886 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1887 	lp->d_ntracks = 1;
   1888 	lp->d_ncylinders = raidPtr->totalSectors /
   1889 		(lp->d_nsectors * lp->d_ntracks);
   1890 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1891 
   1892 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1893 	lp->d_type = DTYPE_RAID;
   1894 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1895 	lp->d_rpm = 3600;
   1896 	lp->d_interleave = 1;
   1897 	lp->d_flags = 0;
   1898 
   1899 	lp->d_partitions[RAW_PART].p_offset = 0;
   1900 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1901 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1902 	lp->d_npartitions = RAW_PART + 1;
   1903 
   1904 	lp->d_magic = DISKMAGIC;
   1905 	lp->d_magic2 = DISKMAGIC;
   1906 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1907 
   1908 }
   1909 /*
   1910  * Read the disklabel from the raid device.  If one is not present, fake one
   1911  * up.
   1912  */
   1913 static void
   1914 raidgetdisklabel(dev)
   1915 	dev_t   dev;
   1916 {
   1917 	int     unit = raidunit(dev);
   1918 	struct raid_softc *rs = &raid_softc[unit];
   1919 	char   *errstring;
   1920 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1921 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1922 	RF_Raid_t *raidPtr;
   1923 
   1924 	db1_printf(("Getting the disklabel...\n"));
   1925 
   1926 	bzero(clp, sizeof(*clp));
   1927 
   1928 	raidPtr = raidPtrs[unit];
   1929 
   1930 	raidgetdefaultlabel(raidPtr, rs, lp);
   1931 
   1932 	/*
   1933 	 * Call the generic disklabel extraction routine.
   1934 	 */
   1935 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1936 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1937 	if (errstring)
   1938 		raidmakedisklabel(rs);
   1939 	else {
   1940 		int     i;
   1941 		struct partition *pp;
   1942 
   1943 		/*
   1944 		 * Sanity check whether the found disklabel is valid.
   1945 		 *
   1946 		 * This is necessary since total size of the raid device
   1947 		 * may vary when an interleave is changed even though exactly
   1948 		 * same componets are used, and old disklabel may used
   1949 		 * if that is found.
   1950 		 */
   1951 		if (lp->d_secperunit != rs->sc_size)
   1952 			printf("WARNING: %s: "
   1953 			    "total sector size in disklabel (%d) != "
   1954 			    "the size of raid (%ld)\n", rs->sc_xname,
   1955 			    lp->d_secperunit, (long) rs->sc_size);
   1956 		for (i = 0; i < lp->d_npartitions; i++) {
   1957 			pp = &lp->d_partitions[i];
   1958 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1959 				printf("WARNING: %s: end of partition `%c' "
   1960 				    "exceeds the size of raid (%ld)\n",
   1961 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1962 		}
   1963 	}
   1964 
   1965 }
   1966 /*
   1967  * Take care of things one might want to take care of in the event
   1968  * that a disklabel isn't present.
   1969  */
   1970 static void
   1971 raidmakedisklabel(rs)
   1972 	struct raid_softc *rs;
   1973 {
   1974 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1975 	db1_printf(("Making a label..\n"));
   1976 
   1977 	/*
   1978 	 * For historical reasons, if there's no disklabel present
   1979 	 * the raw partition must be marked FS_BSDFFS.
   1980 	 */
   1981 
   1982 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1983 
   1984 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1985 
   1986 	lp->d_checksum = dkcksum(lp);
   1987 }
   1988 /*
   1989  * Lookup the provided name in the filesystem.  If the file exists,
   1990  * is a valid block device, and isn't being used by anyone else,
   1991  * set *vpp to the file's vnode.
   1992  * You'll find the original of this in ccd.c
   1993  */
   1994 int
   1995 raidlookup(path, p, vpp)
   1996 	char   *path;
   1997 	struct proc *p;
   1998 	struct vnode **vpp;	/* result */
   1999 {
   2000 	struct nameidata nd;
   2001 	struct vnode *vp;
   2002 	struct vattr va;
   2003 	int     error;
   2004 
   2005 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2006 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2007 #ifdef DEBUG
   2008 		printf("RAIDframe: vn_open returned %d\n", error);
   2009 #endif
   2010 		return (error);
   2011 	}
   2012 	vp = nd.ni_vp;
   2013 	if (vp->v_usecount > 1) {
   2014 		VOP_UNLOCK(vp, 0);
   2015 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2016 		return (EBUSY);
   2017 	}
   2018 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2019 		VOP_UNLOCK(vp, 0);
   2020 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2021 		return (error);
   2022 	}
   2023 	/* XXX: eventually we should handle VREG, too. */
   2024 	if (va.va_type != VBLK) {
   2025 		VOP_UNLOCK(vp, 0);
   2026 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2027 		return (ENOTBLK);
   2028 	}
   2029 	VOP_UNLOCK(vp, 0);
   2030 	*vpp = vp;
   2031 	return (0);
   2032 }
   2033 /*
   2034  * Wait interruptibly for an exclusive lock.
   2035  *
   2036  * XXX
   2037  * Several drivers do this; it should be abstracted and made MP-safe.
   2038  * (Hmm... where have we seen this warning before :->  GO )
   2039  */
   2040 static int
   2041 raidlock(rs)
   2042 	struct raid_softc *rs;
   2043 {
   2044 	int     error;
   2045 
   2046 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2047 		rs->sc_flags |= RAIDF_WANTED;
   2048 		if ((error =
   2049 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2050 			return (error);
   2051 	}
   2052 	rs->sc_flags |= RAIDF_LOCKED;
   2053 	return (0);
   2054 }
   2055 /*
   2056  * Unlock and wake up any waiters.
   2057  */
   2058 static void
   2059 raidunlock(rs)
   2060 	struct raid_softc *rs;
   2061 {
   2062 
   2063 	rs->sc_flags &= ~RAIDF_LOCKED;
   2064 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2065 		rs->sc_flags &= ~RAIDF_WANTED;
   2066 		wakeup(rs);
   2067 	}
   2068 }
   2069 
   2070 
   2071 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2072 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2073 
   2074 int
   2075 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2076 {
   2077 	RF_ComponentLabel_t clabel;
   2078 	raidread_component_label(dev, b_vp, &clabel);
   2079 	clabel.mod_counter = mod_counter;
   2080 	clabel.clean = RF_RAID_CLEAN;
   2081 	raidwrite_component_label(dev, b_vp, &clabel);
   2082 	return(0);
   2083 }
   2084 
   2085 
   2086 int
   2087 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2088 {
   2089 	RF_ComponentLabel_t clabel;
   2090 	raidread_component_label(dev, b_vp, &clabel);
   2091 	clabel.mod_counter = mod_counter;
   2092 	clabel.clean = RF_RAID_DIRTY;
   2093 	raidwrite_component_label(dev, b_vp, &clabel);
   2094 	return(0);
   2095 }
   2096 
   2097 /* ARGSUSED */
   2098 int
   2099 raidread_component_label(dev, b_vp, clabel)
   2100 	dev_t dev;
   2101 	struct vnode *b_vp;
   2102 	RF_ComponentLabel_t *clabel;
   2103 {
   2104 	struct buf *bp;
   2105 	int error;
   2106 
   2107 	/* XXX should probably ensure that we don't try to do this if
   2108 	   someone has changed rf_protected_sectors. */
   2109 
   2110 	/* get a block of the appropriate size... */
   2111 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2112 	bp->b_dev = dev;
   2113 
   2114 	/* get our ducks in a row for the read */
   2115 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2116 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2117 	bp->b_flags = B_BUSY | B_READ;
   2118  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2119 
   2120 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2121 
   2122 	error = biowait(bp);
   2123 
   2124 	if (!error) {
   2125 		memcpy(clabel, bp->b_un.b_addr,
   2126 		       sizeof(RF_ComponentLabel_t));
   2127 #if 0
   2128 		print_component_label( clabel );
   2129 #endif
   2130         } else {
   2131 #if 0
   2132 		printf("Failed to read RAID component label!\n");
   2133 #endif
   2134 	}
   2135 
   2136         bp->b_flags = B_INVAL | B_AGE;
   2137 	brelse(bp);
   2138 	return(error);
   2139 }
   2140 /* ARGSUSED */
   2141 int
   2142 raidwrite_component_label(dev, b_vp, clabel)
   2143 	dev_t dev;
   2144 	struct vnode *b_vp;
   2145 	RF_ComponentLabel_t *clabel;
   2146 {
   2147 	struct buf *bp;
   2148 	int error;
   2149 
   2150 	/* get a block of the appropriate size... */
   2151 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2152 	bp->b_dev = dev;
   2153 
   2154 	/* get our ducks in a row for the write */
   2155 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2156 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2157 	bp->b_flags = B_BUSY | B_WRITE;
   2158  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2159 
   2160 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2161 
   2162 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2163 
   2164 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2165 	error = biowait(bp);
   2166         bp->b_flags = B_INVAL | B_AGE;
   2167 	brelse(bp);
   2168 	if (error) {
   2169 #if 1
   2170 		printf("Failed to write RAID component info!\n");
   2171 #endif
   2172 	}
   2173 
   2174 	return(error);
   2175 }
   2176 
   2177 void
   2178 rf_markalldirty( raidPtr )
   2179 	RF_Raid_t *raidPtr;
   2180 {
   2181 	RF_ComponentLabel_t clabel;
   2182 	int r,c;
   2183 
   2184 	raidPtr->mod_counter++;
   2185 	for (r = 0; r < raidPtr->numRow; r++) {
   2186 		for (c = 0; c < raidPtr->numCol; c++) {
   2187 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2188 				raidread_component_label(
   2189 					raidPtr->Disks[r][c].dev,
   2190 					raidPtr->raid_cinfo[r][c].ci_vp,
   2191 					&clabel);
   2192 				if (clabel.status == rf_ds_spared) {
   2193 					/* XXX do something special...
   2194 					 but whatever you do, don't
   2195 					 try to access it!! */
   2196 				} else {
   2197 #if 0
   2198 				clabel.status =
   2199 					raidPtr->Disks[r][c].status;
   2200 				raidwrite_component_label(
   2201 					raidPtr->Disks[r][c].dev,
   2202 					raidPtr->raid_cinfo[r][c].ci_vp,
   2203 					&clabel);
   2204 #endif
   2205 				raidmarkdirty(
   2206 				       raidPtr->Disks[r][c].dev,
   2207 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2208 				       raidPtr->mod_counter);
   2209 				}
   2210 			}
   2211 		}
   2212 	}
   2213 	/* printf("Component labels marked dirty.\n"); */
   2214 #if 0
   2215 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2216 		sparecol = raidPtr->numCol + c;
   2217 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2218 			/*
   2219 
   2220 			   XXX this is where we get fancy and map this spare
   2221 			   into it's correct spot in the array.
   2222 
   2223 			 */
   2224 			/*
   2225 
   2226 			   we claim this disk is "optimal" if it's
   2227 			   rf_ds_used_spare, as that means it should be
   2228 			   directly substitutable for the disk it replaced.
   2229 			   We note that too...
   2230 
   2231 			 */
   2232 
   2233 			for(i=0;i<raidPtr->numRow;i++) {
   2234 				for(j=0;j<raidPtr->numCol;j++) {
   2235 					if ((raidPtr->Disks[i][j].spareRow ==
   2236 					     r) &&
   2237 					    (raidPtr->Disks[i][j].spareCol ==
   2238 					     sparecol)) {
   2239 						srow = r;
   2240 						scol = sparecol;
   2241 						break;
   2242 					}
   2243 				}
   2244 			}
   2245 
   2246 			raidread_component_label(
   2247 				      raidPtr->Disks[r][sparecol].dev,
   2248 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2249 				      &clabel);
   2250 			/* make sure status is noted */
   2251 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2252 			clabel.mod_counter = raidPtr->mod_counter;
   2253 			clabel.serial_number = raidPtr->serial_number;
   2254 			clabel.row = srow;
   2255 			clabel.column = scol;
   2256 			clabel.num_rows = raidPtr->numRow;
   2257 			clabel.num_columns = raidPtr->numCol;
   2258 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2259 			clabel.status = rf_ds_optimal;
   2260 			raidwrite_component_label(
   2261 				      raidPtr->Disks[r][sparecol].dev,
   2262 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2263 				      &clabel);
   2264 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2265 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2266 		}
   2267 	}
   2268 
   2269 #endif
   2270 }
   2271 
   2272 
   2273 void
   2274 rf_update_component_labels( raidPtr )
   2275 	RF_Raid_t *raidPtr;
   2276 {
   2277 	RF_ComponentLabel_t clabel;
   2278 	int sparecol;
   2279 	int r,c;
   2280 	int i,j;
   2281 	int srow, scol;
   2282 
   2283 	srow = -1;
   2284 	scol = -1;
   2285 
   2286 	/* XXX should do extra checks to make sure things really are clean,
   2287 	   rather than blindly setting the clean bit... */
   2288 
   2289 	raidPtr->mod_counter++;
   2290 
   2291 	for (r = 0; r < raidPtr->numRow; r++) {
   2292 		for (c = 0; c < raidPtr->numCol; c++) {
   2293 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2294 				raidread_component_label(
   2295 					raidPtr->Disks[r][c].dev,
   2296 					raidPtr->raid_cinfo[r][c].ci_vp,
   2297 					&clabel);
   2298 				/* make sure status is noted */
   2299 				clabel.status = rf_ds_optimal;
   2300 				raidwrite_component_label(
   2301 					raidPtr->Disks[r][c].dev,
   2302 					raidPtr->raid_cinfo[r][c].ci_vp,
   2303 					&clabel);
   2304 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2305 					raidmarkclean(
   2306 					      raidPtr->Disks[r][c].dev,
   2307 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2308 					      raidPtr->mod_counter);
   2309 				}
   2310 			}
   2311 			/* else we don't touch it.. */
   2312 #if 0
   2313 			else if (raidPtr->Disks[r][c].status !=
   2314 				   rf_ds_failed) {
   2315 				raidread_component_label(
   2316 					raidPtr->Disks[r][c].dev,
   2317 					raidPtr->raid_cinfo[r][c].ci_vp,
   2318 					&clabel);
   2319 				/* make sure status is noted */
   2320 				clabel.status =
   2321 					raidPtr->Disks[r][c].status;
   2322 				raidwrite_component_label(
   2323 					raidPtr->Disks[r][c].dev,
   2324 					raidPtr->raid_cinfo[r][c].ci_vp,
   2325 					&clabel);
   2326 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2327 					raidmarkclean(
   2328 					      raidPtr->Disks[r][c].dev,
   2329 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2330 					      raidPtr->mod_counter);
   2331 				}
   2332 			}
   2333 #endif
   2334 		}
   2335 	}
   2336 
   2337 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2338 		sparecol = raidPtr->numCol + c;
   2339 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2340 			/*
   2341 
   2342 			   we claim this disk is "optimal" if it's
   2343 			   rf_ds_used_spare, as that means it should be
   2344 			   directly substitutable for the disk it replaced.
   2345 			   We note that too...
   2346 
   2347 			 */
   2348 
   2349 			for(i=0;i<raidPtr->numRow;i++) {
   2350 				for(j=0;j<raidPtr->numCol;j++) {
   2351 					if ((raidPtr->Disks[i][j].spareRow ==
   2352 					     0) &&
   2353 					    (raidPtr->Disks[i][j].spareCol ==
   2354 					     sparecol)) {
   2355 						srow = i;
   2356 						scol = j;
   2357 						break;
   2358 					}
   2359 				}
   2360 			}
   2361 
   2362 			raidread_component_label(
   2363 				      raidPtr->Disks[0][sparecol].dev,
   2364 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2365 				      &clabel);
   2366 			/* make sure status is noted */
   2367 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2368 			clabel.mod_counter = raidPtr->mod_counter;
   2369 			clabel.serial_number = raidPtr->serial_number;
   2370 			clabel.row = srow;
   2371 			clabel.column = scol;
   2372 			clabel.num_rows = raidPtr->numRow;
   2373 			clabel.num_columns = raidPtr->numCol;
   2374 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2375 			clabel.status = rf_ds_optimal;
   2376 			raidwrite_component_label(
   2377 				      raidPtr->Disks[0][sparecol].dev,
   2378 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2379 				      &clabel);
   2380 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2381 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2382 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2383 					       raidPtr->mod_counter);
   2384 			}
   2385 		}
   2386 	}
   2387 	/* 	printf("Component labels updated\n"); */
   2388 }
   2389 
   2390 void
   2391 rf_ReconThread(req)
   2392 	struct rf_recon_req *req;
   2393 {
   2394 	int     s;
   2395 	RF_Raid_t *raidPtr;
   2396 
   2397 	s = splbio();
   2398 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2399 	raidPtr->recon_in_progress = 1;
   2400 
   2401 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2402 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2403 
   2404 	/* XXX get rid of this! we don't need it at all.. */
   2405 	RF_Free(req, sizeof(*req));
   2406 
   2407 	raidPtr->recon_in_progress = 0;
   2408 	splx(s);
   2409 
   2410 	/* That's all... */
   2411 	kthread_exit(0);        /* does not return */
   2412 }
   2413 
   2414 void
   2415 rf_RewriteParityThread(raidPtr)
   2416 	RF_Raid_t *raidPtr;
   2417 {
   2418 	int retcode;
   2419 	int s;
   2420 
   2421 	raidPtr->parity_rewrite_in_progress = 1;
   2422 	s = splbio();
   2423 	retcode = rf_RewriteParity(raidPtr);
   2424 	splx(s);
   2425 	if (retcode) {
   2426 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2427 	} else {
   2428 		/* set the clean bit!  If we shutdown correctly,
   2429 		   the clean bit on each component label will get
   2430 		   set */
   2431 		raidPtr->parity_good = RF_RAID_CLEAN;
   2432 	}
   2433 	raidPtr->parity_rewrite_in_progress = 0;
   2434 
   2435 	/* That's all... */
   2436 	kthread_exit(0);        /* does not return */
   2437 }
   2438 
   2439 
   2440 void
   2441 rf_CopybackThread(raidPtr)
   2442 	RF_Raid_t *raidPtr;
   2443 {
   2444 	int s;
   2445 
   2446 	raidPtr->copyback_in_progress = 1;
   2447 	s = splbio();
   2448 	rf_CopybackReconstructedData(raidPtr);
   2449 	splx(s);
   2450 	raidPtr->copyback_in_progress = 0;
   2451 
   2452 	/* That's all... */
   2453 	kthread_exit(0);        /* does not return */
   2454 }
   2455 
   2456 
   2457 void
   2458 rf_ReconstructInPlaceThread(req)
   2459 	struct rf_recon_req *req;
   2460 {
   2461 	int retcode;
   2462 	int s;
   2463 	RF_Raid_t *raidPtr;
   2464 
   2465 	s = splbio();
   2466 	raidPtr = req->raidPtr;
   2467 	raidPtr->recon_in_progress = 1;
   2468 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2469 	RF_Free(req, sizeof(*req));
   2470 	raidPtr->recon_in_progress = 0;
   2471 	splx(s);
   2472 
   2473 	/* That's all... */
   2474 	kthread_exit(0);        /* does not return */
   2475 }
   2476 
   2477 void
   2478 rf_mountroot_hook(dev)
   2479 	struct device *dev;
   2480 {
   2481 #if 1
   2482 	printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
   2483 #endif
   2484 	if (boothowto & RB_ASKNAME) {
   2485 		/* We don't auto-config... */
   2486 	} else {
   2487 		/* They didn't ask, and we found something bootable... */
   2488 		/* XXX pretend for now.. */
   2489 if (raidautoconfig) {
   2490 		rootspec = raid_rooty;
   2491 }
   2492 	}
   2493 }
   2494 
   2495 
   2496 RF_AutoConfig_t *
   2497 rf_find_raid_components()
   2498 {
   2499 	struct devnametobdevmaj *dtobdm;
   2500 	struct vnode *vp;
   2501 	struct disklabel label;
   2502 	struct device *dv;
   2503 	char *cd_name;
   2504 	dev_t dev;
   2505 	int error;
   2506 	int i;
   2507 	int good_one;
   2508 	RF_ComponentLabel_t *clabel;
   2509 	RF_AutoConfig_t *ac_list;
   2510 	RF_AutoConfig_t *ac;
   2511 
   2512 
   2513 	/* initialize the AutoConfig list */
   2514 	ac_list = NULL;
   2515 
   2516 if (raidautoconfig) {
   2517 
   2518 	/* we begin by trolling through *all* the devices on the system */
   2519 
   2520 	for (dv = alldevs.tqh_first; dv != NULL;
   2521 	     dv = dv->dv_list.tqe_next) {
   2522 
   2523 		/* we are only interested in disks... */
   2524 		if (dv->dv_class != DV_DISK)
   2525 			continue;
   2526 
   2527 		/* we don't care about floppies... */
   2528 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2529 			continue;
   2530 		}
   2531 
   2532 		/* need to find the device_name_to_block_device_major stuff */
   2533 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2534 		dtobdm = dev_name2blk;
   2535 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2536 			dtobdm++;
   2537 		}
   2538 
   2539 		/* get a vnode for the raw partition of this disk */
   2540 
   2541 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2542 		if (bdevvp(dev, &vp))
   2543 			panic("RAID can't alloc vnode");
   2544 
   2545 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2546 
   2547 		if (error) {
   2548 			/* "Who cares."  Continue looking
   2549 			   for something that exists*/
   2550 			vput(vp);
   2551 			continue;
   2552 		}
   2553 
   2554 		/* Ok, the disk exists.  Go get the disklabel. */
   2555 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2556 				  FREAD, NOCRED, 0);
   2557 		if (error) {
   2558 			/*
   2559 			 * XXX can't happen - open() would
   2560 			 * have errored out (or faked up one)
   2561 			 */
   2562 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2563 			       dv->dv_xname, 'a' + RAW_PART, error);
   2564 		}
   2565 
   2566 		/* don't need this any more.  We'll allocate it again
   2567 		   a little later if we really do... */
   2568 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2569 		vput(vp);
   2570 
   2571 		for (i=0; i < label.d_npartitions; i++) {
   2572 			/* We only support partitions marked as RAID */
   2573 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2574 				continue;
   2575 
   2576 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2577 			if (bdevvp(dev, &vp))
   2578 				panic("RAID can't alloc vnode");
   2579 
   2580 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2581 			if (error) {
   2582 				/* Whatever... */
   2583 				vput(vp);
   2584 				continue;
   2585 			}
   2586 
   2587 			good_one = 0;
   2588 
   2589 			clabel = (RF_ComponentLabel_t *)
   2590 				malloc(sizeof(RF_ComponentLabel_t),
   2591 				       M_RAIDFRAME, M_NOWAIT);
   2592 			if (clabel == NULL) {
   2593 				/* XXX CLEANUP HERE */
   2594 				printf("RAID auto config: out of memory!\n");
   2595 				return(NULL); /* XXX probably should panic? */
   2596 			}
   2597 
   2598 			if (!raidread_component_label(dev, vp, clabel)) {
   2599 				/* Got the label.  Does it look reasonable? */
   2600 				if (rf_reasonable_label(clabel) &&
   2601 				    (clabel->partitionSize <=
   2602 				     label.d_partitions[i].p_size)) {
   2603 #if DEBUG
   2604 					printf("Component on: %s%c: %d\n",
   2605 					       dv->dv_xname, 'a'+i,
   2606 					       label.d_partitions[i].p_size);
   2607 					print_component_label(clabel);
   2608 #endif
   2609 					/* if it's reasonable, add it,
   2610 					   else ignore it. */
   2611 					ac = (RF_AutoConfig_t *)
   2612 						malloc(sizeof(RF_AutoConfig_t),
   2613 						       M_RAIDFRAME,
   2614 						       M_NOWAIT);
   2615 					if (ac == NULL) {
   2616 						/* XXX should panic?? */
   2617 						return(NULL);
   2618 					}
   2619 
   2620 					sprintf(ac->devname, "%s%c",
   2621 						dv->dv_xname, 'a'+i);
   2622 					ac->dev = dev;
   2623 					ac->vp = vp;
   2624 					ac->clabel = clabel;
   2625 					ac->next = ac_list;
   2626 					ac_list = ac;
   2627 					good_one = 1;
   2628 				}
   2629 			}
   2630 			if (!good_one) {
   2631 				/* cleanup */
   2632 				free(clabel, M_RAIDFRAME);
   2633 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2634 				vput(vp);
   2635 			}
   2636 		}
   2637 	}
   2638 }
   2639 return(ac_list);
   2640 }
   2641 
   2642 static int
   2643 rf_reasonable_label(clabel)
   2644 	RF_ComponentLabel_t *clabel;
   2645 {
   2646 
   2647 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2648 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2649 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2650 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2651 	    clabel->row >=0 &&
   2652 	    clabel->column >= 0 &&
   2653 	    clabel->num_rows > 0 &&
   2654 	    clabel->num_columns > 0 &&
   2655 	    clabel->row < clabel->num_rows &&
   2656 	    clabel->column < clabel->num_columns &&
   2657 	    clabel->blockSize > 0 &&
   2658 	    clabel->numBlocks > 0) {
   2659 		/* label looks reasonable enough... */
   2660 		return(1);
   2661 	}
   2662 	return(0);
   2663 }
   2664 
   2665 
   2666 void
   2667 print_component_label(clabel)
   2668 	RF_ComponentLabel_t *clabel;
   2669 {
   2670 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2671 	       clabel->row, clabel->column,
   2672 	       clabel->num_rows, clabel->num_columns);
   2673 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2674 	       clabel->version, clabel->serial_number,
   2675 	       clabel->mod_counter);
   2676 	printf("   Clean: %s Status: %d\n",
   2677 	       clabel->clean ? "Yes" : "No", clabel->status );
   2678 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2679 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2680 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2681 	       (char) clabel->parityConfig, clabel->blockSize,
   2682 	       clabel->numBlocks);
   2683 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2684 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2685 #if 0
   2686 	   printf("   Config order: %d\n", clabel->config_order);
   2687 #endif
   2688 
   2689 }
   2690 
   2691 RF_ConfigSet_t *
   2692 rf_create_auto_sets(ac_list)
   2693 	RF_AutoConfig_t *ac_list;
   2694 {
   2695 	RF_AutoConfig_t *ac;
   2696 	RF_ConfigSet_t *config_sets;
   2697 	RF_ConfigSet_t *cset;
   2698 	RF_AutoConfig_t *ac_next;
   2699 
   2700 
   2701 	config_sets = NULL;
   2702 
   2703 	/* Go through the AutoConfig list, and figure out which components
   2704 	   belong to what sets.  */
   2705 	ac = ac_list;
   2706 	while(ac!=NULL) {
   2707 		/* we're going to putz with ac->next, so save it here
   2708 		   for use at the end of the loop */
   2709 		ac_next = ac->next;
   2710 
   2711 		if (config_sets == NULL) {
   2712 			/* will need at least this one... */
   2713 			config_sets = (RF_ConfigSet_t *)
   2714 				malloc(sizeof(RF_ConfigSet_t),
   2715 				       M_RAIDFRAME, M_NOWAIT);
   2716 			if (config_sets == NULL) {
   2717 				panic("rf_create_auto_sets: No memory!\n");
   2718 			}
   2719 			/* this one is easy :) */
   2720 			config_sets->ac = ac;
   2721 			config_sets->next = NULL;
   2722 			config_sets->rootable = 0;
   2723 			ac->next = NULL;
   2724 		} else {
   2725 			/* which set does this component fit into? */
   2726 			cset = config_sets;
   2727 			while(cset!=NULL) {
   2728 				if (rf_does_it_fit(cset, ac)) {
   2729 					/* looks like it matches */
   2730 					ac->next = cset->ac;
   2731 					cset->ac = ac;
   2732 					break;
   2733 				}
   2734 				cset = cset->next;
   2735 			}
   2736 			if (cset==NULL) {
   2737 				/* didn't find a match above... new set..*/
   2738 				cset = (RF_ConfigSet_t *)
   2739 					malloc(sizeof(RF_ConfigSet_t),
   2740 					       M_RAIDFRAME, M_NOWAIT);
   2741 				if (cset == NULL) {
   2742 					panic("rf_create_auto_sets: No memory!\n");
   2743 				}
   2744 				cset->ac = ac;
   2745 				ac->next = NULL;
   2746 				cset->next = config_sets;
   2747 				cset->rootable = 0;
   2748 				config_sets = cset;
   2749 			}
   2750 		}
   2751 		ac = ac_next;
   2752 	}
   2753 
   2754 
   2755 	return(config_sets);
   2756 }
   2757 
   2758 static int
   2759 rf_does_it_fit(cset, ac)
   2760 	RF_ConfigSet_t *cset;
   2761 	RF_AutoConfig_t *ac;
   2762 {
   2763 	RF_ComponentLabel_t *clabel1, *clabel2;
   2764 
   2765 	/* If this one matches the *first* one in the set, that's good
   2766 	   enough, since the other members of the set would have been
   2767 	   through here too... */
   2768 	/* note that we are not checking partitionSize here.. */
   2769 
   2770 	clabel1 = cset->ac->clabel;
   2771 	clabel2 = ac->clabel;
   2772 	if ((clabel1->version == clabel2->version) &&
   2773 	    (clabel1->serial_number == clabel2->serial_number) &&
   2774 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2775 	    (clabel1->num_rows == clabel2->num_rows) &&
   2776 	    (clabel1->num_columns == clabel2->num_columns) &&
   2777 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2778 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2779 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2780 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2781 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2782 	    (clabel1->blockSize == clabel2->blockSize) &&
   2783 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2784 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2785 	    (clabel1->root_partition == clabel2->root_partition) &&
   2786 	    (clabel1->last_unit == clabel2->last_unit) &&
   2787 	    (clabel1->config_order == clabel2->config_order)) {
   2788 		/* if it get's here, it almost *has* to be a match */
   2789 	} else {
   2790 		/* it's not consistent with somebody in the set..
   2791 		   punt */
   2792 		return(0);
   2793 	}
   2794 	/* all was fine.. it must fit... */
   2795 	return(1);
   2796 }
   2797 
   2798 int
   2799 rf_have_enough_components(cset)
   2800 	RF_ConfigSet_t *cset;
   2801 {
   2802 	RF_AutoConfig_t *ac;
   2803 	RF_AutoConfig_t *auto_config;
   2804 	RF_ComponentLabel_t *clabel;
   2805 	int r,c;
   2806 	int num_rows;
   2807 	int num_cols;
   2808 	int num_missing;
   2809 
   2810 	/* check to see that we have enough 'live' components
   2811 	   of this set.  If so, we can configure it if necessary */
   2812 
   2813 	num_rows = cset->ac->clabel->num_rows;
   2814 	num_cols = cset->ac->clabel->num_columns;
   2815 
   2816 	/* XXX Check for duplicate components!?!?!? */
   2817 
   2818 	num_missing = 0;
   2819 	auto_config = cset->ac;
   2820 
   2821 	for(r=0; r<num_rows; r++) {
   2822 		for(c=0; c<num_cols; c++) {
   2823 			ac = auto_config;
   2824 			while(ac!=NULL) {
   2825 				if (ac->clabel==NULL) {
   2826 					/* big-time bad news. */
   2827 					goto fail;
   2828 				}
   2829 				if ((ac->clabel->row == r) &&
   2830 				    (ac->clabel->column == c)) {
   2831 					/* it's this one... */
   2832 #if DEBUG
   2833 					printf("Found: %s at %d,%d\n",
   2834 					       ac->devname,r,c);
   2835 #endif
   2836 					break;
   2837 				}
   2838 				ac=ac->next;
   2839 			}
   2840 			if (ac==NULL) {
   2841 				/* Didn't find one here! */
   2842 				num_missing++;
   2843 			}
   2844 		}
   2845 	}
   2846 
   2847 	clabel = cset->ac->clabel;
   2848 
   2849 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2850 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2851 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2852 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2853 		/* XXX this needs to be made *much* more general */
   2854 		/* Too many failures */
   2855 		return(0);
   2856 	}
   2857 	/* otherwise, all is well, and we've got enough to take a kick
   2858 	   at autoconfiguring this set */
   2859 	return(1);
   2860 fail:
   2861 	return(0);
   2862 
   2863 }
   2864 
   2865 void
   2866 rf_create_configuration(ac,config,raidPtr)
   2867 	RF_AutoConfig_t *ac;
   2868 	RF_Config_t *config;
   2869 	RF_Raid_t *raidPtr;
   2870 {
   2871 	RF_ComponentLabel_t *clabel;
   2872 
   2873 	clabel = ac->clabel;
   2874 
   2875 	/* 1. Fill in the common stuff */
   2876 	config->numRow = clabel->num_rows;
   2877 	config->numCol = clabel->num_columns;
   2878 	config->numSpare = 0; /* XXX should this be set here? */
   2879 	config->sectPerSU = clabel->sectPerSU;
   2880 	config->SUsPerPU = clabel->SUsPerPU;
   2881 	config->SUsPerRU = clabel->SUsPerRU;
   2882 	config->parityConfig = clabel->parityConfig;
   2883 	/* XXX... */
   2884 	strcpy(config->diskQueueType,"fifo");
   2885 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2886 	config->layoutSpecificSize = 0; /* XXX ?? */
   2887 
   2888 	while(ac!=NULL) {
   2889 		/* row/col values will be in range due to the checks
   2890 		   in reasonable_label() */
   2891 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2892 		       ac->devname);
   2893 		ac = ac->next;
   2894 	}
   2895 
   2896 }
   2897 
   2898 int
   2899 rf_set_autoconfig(raidPtr, new_value)
   2900 	RF_Raid_t *raidPtr;
   2901 	int new_value;
   2902 {
   2903 	RF_ComponentLabel_t clabel;
   2904 	struct vnode *vp;
   2905 	dev_t dev;
   2906 	int row, column;
   2907 
   2908 	raidPtr->autoconfigure = new_value;
   2909 	for(row=0; row<raidPtr->numRow; row++) {
   2910 		for(column=0; column<raidPtr->numCol; column++) {
   2911 			dev = raidPtr->Disks[row][column].dev;
   2912 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2913 			raidread_component_label(dev, vp, &clabel);
   2914 			clabel.autoconfigure = new_value;
   2915 			raidwrite_component_label(dev, vp, &clabel);
   2916 		}
   2917 	}
   2918 	return(new_value);
   2919 }
   2920 
   2921 int
   2922 rf_set_rootpartition(raidPtr, new_value)
   2923 	RF_Raid_t *raidPtr;
   2924 	int new_value;
   2925 {
   2926 	RF_ComponentLabel_t clabel;
   2927 	struct vnode *vp;
   2928 	dev_t dev;
   2929 	int row, column;
   2930 
   2931 	raidPtr->root_partition = new_value;
   2932 	for(row=0; row<raidPtr->numRow; row++) {
   2933 		for(column=0; column<raidPtr->numCol; column++) {
   2934 			dev = raidPtr->Disks[row][column].dev;
   2935 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2936 			raidread_component_label(dev, vp, &clabel);
   2937 			clabel.root_partition = new_value;
   2938 			raidwrite_component_label(dev, vp, &clabel);
   2939 		}
   2940 	}
   2941 	return(new_value);
   2942 }
   2943 
   2944 void
   2945 rf_release_all_vps(cset)
   2946 	RF_ConfigSet_t *cset;
   2947 {
   2948 	RF_AutoConfig_t *ac;
   2949 
   2950 	ac = cset->ac;
   2951 	while(ac!=NULL) {
   2952 		/* Close the vp, and give it back */
   2953 		if (ac->vp) {
   2954 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2955 			vput(ac->vp);
   2956 		}
   2957 		ac = ac->next;
   2958 	}
   2959 }
   2960 
   2961 
   2962 void
   2963 rf_cleanup_config_set(cset)
   2964 	RF_ConfigSet_t *cset;
   2965 {
   2966 	RF_AutoConfig_t *ac;
   2967 	RF_AutoConfig_t *next_ac;
   2968 
   2969 	ac = cset->ac;
   2970 	while(ac!=NULL) {
   2971 		next_ac = ac->next;
   2972 		/* nuke the label */
   2973 		free(ac->clabel, M_RAIDFRAME);
   2974 		/* cleanup the config structure */
   2975 		free(ac, M_RAIDFRAME);
   2976 		/* "next.." */
   2977 		ac = next_ac;
   2978 	}
   2979 	/* and, finally, nuke the config set */
   2980 	free(cset, M_RAIDFRAME);
   2981 }
   2982 
   2983 
   2984 void
   2985 raid_init_component_label(raidPtr, clabel)
   2986 	RF_Raid_t *raidPtr;
   2987 	RF_ComponentLabel_t *clabel;
   2988 {
   2989 	/* current version number */
   2990 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   2991 	clabel->serial_number = clabel->serial_number;
   2992 	clabel->mod_counter = raidPtr->mod_counter;
   2993 	clabel->num_rows = raidPtr->numRow;
   2994 	clabel->num_columns = raidPtr->numCol;
   2995 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   2996 	clabel->status = rf_ds_optimal; /* "It's good!" */
   2997 
   2998 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   2999 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3000 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3001 
   3002 	clabel->blockSize = raidPtr->bytesPerSector;
   3003 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3004 
   3005 	/* XXX not portable */
   3006 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3007 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3008 	clabel->autoconfigure = raidPtr->autoconfigure;
   3009 	clabel->root_partition = raidPtr->root_partition;
   3010 	clabel->last_unit = raidPtr->raidid;
   3011 	clabel->config_order = raidPtr->config_order;
   3012 }
   3013 
   3014 int
   3015 rf_auto_config_set(cset,unit)
   3016 	RF_ConfigSet_t *cset;
   3017 	int *unit;
   3018 {
   3019 	RF_Raid_t *raidPtr;
   3020 	RF_Config_t *config;
   3021 	int raidID;
   3022 	int retcode;
   3023 
   3024 	printf("Starting autoconfigure on raid%d\n",raidID);
   3025 
   3026 	retcode = 0;
   3027 	*unit = -1;
   3028 
   3029 	/* 1. Create a config structure */
   3030 
   3031 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3032 				       M_RAIDFRAME,
   3033 				       M_NOWAIT);
   3034 	if (config==NULL) {
   3035 		printf("Out of mem!?!?\n");
   3036 				/* XXX do something more intelligent here. */
   3037 		return(1);
   3038 	}
   3039 	/* XXX raidID needs to be set correctly.. */
   3040 
   3041 	/*
   3042 	   2. Figure out what RAID ID this one is supposed to live at
   3043 	   See if we can get the same RAID dev that it was configured
   3044 	   on last time..
   3045 	*/
   3046 
   3047 	raidID = cset->ac->clabel->last_unit;
   3048 	if ((raidID < 0) || (raidID >= numraid)) {
   3049 		/* let's not wander off into lala land. */
   3050 		raidID = numraid - 1;
   3051 	}
   3052 	if (raidPtrs[raidID]->valid != 0) {
   3053 
   3054 		/*
   3055 		   Nope... Go looking for an alternative...
   3056 		   Start high so we don't immediately use raid0 if that's
   3057 		   not taken.
   3058 		*/
   3059 
   3060 		for(raidID = numraid; raidID >= 0; raidID--) {
   3061 			if (raidPtrs[raidID]->valid == 0) {
   3062 				/* can use this one! */
   3063 				break;
   3064 			}
   3065 		}
   3066 	}
   3067 
   3068 	if (raidID < 0) {
   3069 		/* punt... */
   3070 		printf("Unable to auto configure this set!\n");
   3071 		printf("(Out of RAID devs!)\n");
   3072 		return(1);
   3073 	}
   3074 
   3075 	raidPtr = raidPtrs[raidID];
   3076 
   3077 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3078 	raidPtr->raidid = raidID;
   3079 	raidPtr->openings = RAIDOUTSTANDING;
   3080 
   3081 	/* 3. Build the configuration structure */
   3082 	rf_create_configuration(cset->ac, config, raidPtr);
   3083 
   3084 	/* 4. Do the configuration */
   3085 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3086 
   3087 	if (retcode == 0) {
   3088 #if DEBUG
   3089 		printf("Calling raidinit()\n");
   3090 #endif
   3091 				/* XXX the 0 below is bogus! */
   3092 		retcode = raidinit(0, raidPtrs[raidID], raidID);
   3093 		if (retcode) {
   3094 			printf("init returned: %d\n",retcode);
   3095 		}
   3096 		rf_markalldirty( raidPtrs[raidID] );
   3097 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3098 		if (cset->ac->clabel->root_partition==1) {
   3099 			/* everything configured just fine.  Make a note
   3100 			   that this set is eligible to be root. */
   3101 			cset->rootable = 1;
   3102 			/* XXX do this here? */
   3103 			raidPtrs[raidID]->root_partition = 1;
   3104 		}
   3105 	}
   3106 
   3107 	/* 5. Cleanup */
   3108 	free(config, M_RAIDFRAME);
   3109 
   3110 	*unit = raidID;
   3111 	return(retcode);
   3112 }
   3113