Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.61
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.61 2000/02/25 20:11:00 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit __P((RF_Raid_t *));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208 */
    209 
    210 struct raid_softc {
    211 	int     sc_flags;	/* flags */
    212 	int     sc_cflags;	/* configuration flags */
    213 	size_t  sc_size;        /* size of the raid device */
    214 	char    sc_xname[20];	/* XXX external name */
    215 	struct disk sc_dkdev;	/* generic disk device info */
    216 	struct pool sc_cbufpool;	/* component buffer pool */
    217 	struct buf_queue buf_queue;	/* used for the device queue */
    218 };
    219 /* sc_flags */
    220 #define RAIDF_INITED	0x01	/* unit has been initialized */
    221 #define RAIDF_WLABEL	0x02	/* label area is writable */
    222 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    223 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    224 #define RAIDF_LOCKED	0x80	/* unit is locked */
    225 
    226 #define	raidunit(x)	DISKUNIT(x)
    227 int numraid = 0;
    228 
    229 /*
    230  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    231  * Be aware that large numbers can allow the driver to consume a lot of
    232  * kernel memory, especially on writes, and in degraded mode reads.
    233  *
    234  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    235  * a single 64K write will typically require 64K for the old data,
    236  * 64K for the old parity, and 64K for the new parity, for a total
    237  * of 192K (if the parity buffer is not re-used immediately).
    238  * Even it if is used immedately, that's still 128K, which when multiplied
    239  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    240  *
    241  * Now in degraded mode, for example, a 64K read on the above setup may
    242  * require data reconstruction, which will require *all* of the 4 remaining
    243  * disks to participate -- 4 * 32K/disk == 128K again.
    244  */
    245 
    246 #ifndef RAIDOUTSTANDING
    247 #define RAIDOUTSTANDING   6
    248 #endif
    249 
    250 #define RAIDLABELDEV(dev)	\
    251 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    252 
    253 /* declared here, and made public, for the benefit of KVM stuff.. */
    254 struct raid_softc *raid_softc;
    255 
    256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    257 				     struct disklabel *));
    258 static void raidgetdisklabel __P((dev_t));
    259 static void raidmakedisklabel __P((struct raid_softc *));
    260 
    261 static int raidlock __P((struct raid_softc *));
    262 static void raidunlock __P((struct raid_softc *));
    263 
    264 static void rf_markalldirty __P((RF_Raid_t *));
    265 void rf_mountroot_hook __P((struct device *));
    266 
    267 struct device *raidrootdev;
    268 struct cfdata cf_raidrootdev;
    269 struct cfdriver cfdrv;
    270 /* XXX these should be moved up */
    271 #include "rf_configure.h"
    272 #include <sys/reboot.h>
    273 
    274 void rf_ReconThread __P((struct rf_recon_req *));
    275 /* XXX what I want is: */
    276 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    277 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    278 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    279 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    280 void rf_buildroothack __P((void *));
    281 
    282 RF_AutoConfig_t *rf_find_raid_components __P((void));
    283 void print_component_label __P((RF_ComponentLabel_t *));
    284 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    285 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    286 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    287 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    288 				  RF_Raid_t *));
    289 int rf_set_autoconfig __P((RF_Raid_t *, int));
    290 int rf_set_rootpartition __P((RF_Raid_t *, int));
    291 void rf_release_all_vps __P((RF_ConfigSet_t *));
    292 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    293 int rf_have_enough_components __P((RF_ConfigSet_t *));
    294 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    295 
    296 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    297 				  allow autoconfig to take place */
    298 extern struct device *booted_device;
    299 
    300 void
    301 raidattach(num)
    302 	int     num;
    303 {
    304 	int raidID;
    305 	int i, rc;
    306 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    307 	RF_ConfigSet_t *config_sets;
    308 
    309 #ifdef DEBUG
    310 	printf("raidattach: Asked for %d units\n", num);
    311 #endif
    312 
    313 	if (num <= 0) {
    314 #ifdef DIAGNOSTIC
    315 		panic("raidattach: count <= 0");
    316 #endif
    317 		return;
    318 	}
    319 	/* This is where all the initialization stuff gets done. */
    320 
    321 	numraid = num;
    322 
    323 	/* Make some space for requested number of units... */
    324 
    325 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    326 	if (raidPtrs == NULL) {
    327 		panic("raidPtrs is NULL!!\n");
    328 	}
    329 
    330 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    331 	if (rc) {
    332 		RF_PANIC();
    333 	}
    334 
    335 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    336 
    337 	for (i = 0; i < num; i++)
    338 		raidPtrs[i] = NULL;
    339 	rc = rf_BootRaidframe();
    340 	if (rc == 0)
    341 		printf("Kernelized RAIDframe activated\n");
    342 	else
    343 		panic("Serious error booting RAID!!\n");
    344 
    345 	/* put together some datastructures like the CCD device does.. This
    346 	 * lets us lock the device and what-not when it gets opened. */
    347 
    348 	raid_softc = (struct raid_softc *)
    349 		malloc(num * sizeof(struct raid_softc),
    350 		       M_RAIDFRAME, M_NOWAIT);
    351 	if (raid_softc == NULL) {
    352 		printf("WARNING: no memory for RAIDframe driver\n");
    353 		return;
    354 	}
    355 
    356 	bzero(raid_softc, num * sizeof(struct raid_softc));
    357 
    358 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    359 					      M_RAIDFRAME, M_NOWAIT);
    360 	if (raidrootdev == NULL) {
    361 		panic("No memory for RAIDframe driver!!?!?!\n");
    362 	}
    363 
    364 	for (raidID = 0; raidID < num; raidID++) {
    365 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    366 
    367 		raidrootdev[raidID].dv_class  = DV_DISK;
    368 		raidrootdev[raidID].dv_cfdata = NULL;
    369 		raidrootdev[raidID].dv_unit   = raidID;
    370 		raidrootdev[raidID].dv_parent = NULL;
    371 		raidrootdev[raidID].dv_flags  = 0;
    372 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    373 
    374 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    375 			  (RF_Raid_t *));
    376 		if (raidPtrs[raidID] == NULL) {
    377 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    378 			numraid = raidID;
    379 			return;
    380 		}
    381 	}
    382 
    383 if (raidautoconfig) {
    384 	/* 1. locate all RAID components on the system */
    385 
    386 #if DEBUG
    387 	printf("Searching for raid components...\n");
    388 #endif
    389 	ac_list = rf_find_raid_components();
    390 
    391 	/* 2. sort them into their respective sets */
    392 
    393 	config_sets = rf_create_auto_sets(ac_list);
    394 
    395 	/* 3. evaluate each set and configure the valid ones
    396 	   This gets done in rf_buildroothack() */
    397 
    398 	/* schedule the creation of the thread to do the
    399 	   "/ on RAID" stuff */
    400 
    401 	kthread_create(rf_buildroothack,config_sets);
    402 
    403 #if 0
    404 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    405 #endif
    406 }
    407 
    408 }
    409 
    410 void
    411 rf_buildroothack(arg)
    412 	void *arg;
    413 {
    414 	RF_ConfigSet_t *config_sets = arg;
    415 	RF_ConfigSet_t *cset;
    416 	RF_ConfigSet_t *next_cset;
    417 	int retcode;
    418 	int raidID;
    419 	int rootID;
    420 	int num_root;
    421 
    422 	num_root = 0;
    423 	cset = config_sets;
    424 	while(cset != NULL ) {
    425 		next_cset = cset->next;
    426 		if (rf_have_enough_components(cset) &&
    427 		    cset->ac->clabel->autoconfigure==1) {
    428 			retcode = rf_auto_config_set(cset,&raidID);
    429 			if (!retcode) {
    430 				if (cset->rootable) {
    431 					rootID = raidID;
    432 					num_root++;
    433 				}
    434 			} else {
    435 				/* The autoconfig didn't work :( */
    436 #if DEBUG
    437 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    438 #endif
    439 				rf_release_all_vps(cset);
    440 			}
    441 		} else {
    442 			/* we're not autoconfiguring this set...
    443 			   release the associated resources */
    444 			rf_release_all_vps(cset);
    445 		}
    446 		/* cleanup */
    447 		rf_cleanup_config_set(cset);
    448 		cset = next_cset;
    449 	}
    450 	if (boothowto & RB_ASKNAME) {
    451 		/* We don't auto-config... */
    452 	} else {
    453 		/* They didn't ask, and we found something bootable... */
    454 
    455 		if (num_root == 1) {
    456 			booted_device = &raidrootdev[rootID];
    457 		} else if (num_root > 1) {
    458 			/* we can't guess.. require the user to answer... */
    459 			boothowto |= RB_ASKNAME;
    460 		}
    461 	}
    462 }
    463 
    464 
    465 int
    466 raidsize(dev)
    467 	dev_t   dev;
    468 {
    469 	struct raid_softc *rs;
    470 	struct disklabel *lp;
    471 	int     part, unit, omask, size;
    472 
    473 	unit = raidunit(dev);
    474 	if (unit >= numraid)
    475 		return (-1);
    476 	rs = &raid_softc[unit];
    477 
    478 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    479 		return (-1);
    480 
    481 	part = DISKPART(dev);
    482 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    483 	lp = rs->sc_dkdev.dk_label;
    484 
    485 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    486 		return (-1);
    487 
    488 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    489 		size = -1;
    490 	else
    491 		size = lp->d_partitions[part].p_size *
    492 		    (lp->d_secsize / DEV_BSIZE);
    493 
    494 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    495 		return (-1);
    496 
    497 	return (size);
    498 
    499 }
    500 
    501 int
    502 raiddump(dev, blkno, va, size)
    503 	dev_t   dev;
    504 	daddr_t blkno;
    505 	caddr_t va;
    506 	size_t  size;
    507 {
    508 	/* Not implemented. */
    509 	return ENXIO;
    510 }
    511 /* ARGSUSED */
    512 int
    513 raidopen(dev, flags, fmt, p)
    514 	dev_t   dev;
    515 	int     flags, fmt;
    516 	struct proc *p;
    517 {
    518 	int     unit = raidunit(dev);
    519 	struct raid_softc *rs;
    520 	struct disklabel *lp;
    521 	int     part, pmask;
    522 	int     error = 0;
    523 
    524 	if (unit >= numraid)
    525 		return (ENXIO);
    526 	rs = &raid_softc[unit];
    527 
    528 	if ((error = raidlock(rs)) != 0)
    529 		return (error);
    530 	lp = rs->sc_dkdev.dk_label;
    531 
    532 	part = DISKPART(dev);
    533 	pmask = (1 << part);
    534 
    535 	db1_printf(("Opening raid device number: %d partition: %d\n",
    536 		unit, part));
    537 
    538 
    539 	if ((rs->sc_flags & RAIDF_INITED) &&
    540 	    (rs->sc_dkdev.dk_openmask == 0))
    541 		raidgetdisklabel(dev);
    542 
    543 	/* make sure that this partition exists */
    544 
    545 	if (part != RAW_PART) {
    546 		db1_printf(("Not a raw partition..\n"));
    547 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    548 		    ((part >= lp->d_npartitions) ||
    549 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    550 			error = ENXIO;
    551 			raidunlock(rs);
    552 			db1_printf(("Bailing out...\n"));
    553 			return (error);
    554 		}
    555 	}
    556 	/* Prevent this unit from being unconfigured while open. */
    557 	switch (fmt) {
    558 	case S_IFCHR:
    559 		rs->sc_dkdev.dk_copenmask |= pmask;
    560 		break;
    561 
    562 	case S_IFBLK:
    563 		rs->sc_dkdev.dk_bopenmask |= pmask;
    564 		break;
    565 	}
    566 
    567 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    568 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    569 		/* First one... mark things as dirty... Note that we *MUST*
    570 		 have done a configure before this.  I DO NOT WANT TO BE
    571 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    572 		 THAT THEY BELONG TOGETHER!!!!! */
    573 		/* XXX should check to see if we're only open for reading
    574 		   here... If so, we needn't do this, but then need some
    575 		   other way of keeping track of what's happened.. */
    576 
    577 		rf_markalldirty( raidPtrs[unit] );
    578 	}
    579 
    580 
    581 	rs->sc_dkdev.dk_openmask =
    582 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    583 
    584 	raidunlock(rs);
    585 
    586 	return (error);
    587 
    588 
    589 }
    590 /* ARGSUSED */
    591 int
    592 raidclose(dev, flags, fmt, p)
    593 	dev_t   dev;
    594 	int     flags, fmt;
    595 	struct proc *p;
    596 {
    597 	int     unit = raidunit(dev);
    598 	struct raid_softc *rs;
    599 	int     error = 0;
    600 	int     part;
    601 
    602 	if (unit >= numraid)
    603 		return (ENXIO);
    604 	rs = &raid_softc[unit];
    605 
    606 	if ((error = raidlock(rs)) != 0)
    607 		return (error);
    608 
    609 	part = DISKPART(dev);
    610 
    611 	/* ...that much closer to allowing unconfiguration... */
    612 	switch (fmt) {
    613 	case S_IFCHR:
    614 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    615 		break;
    616 
    617 	case S_IFBLK:
    618 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    619 		break;
    620 	}
    621 	rs->sc_dkdev.dk_openmask =
    622 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    623 
    624 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    625 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    626 		/* Last one... device is not unconfigured yet.
    627 		   Device shutdown has taken care of setting the
    628 		   clean bits if RAIDF_INITED is not set
    629 		   mark things as clean... */
    630 #if 0
    631 		printf("Last one on raid%d.  Updating status.\n",unit);
    632 #endif
    633 		rf_update_component_labels( raidPtrs[unit] );
    634 	}
    635 
    636 	raidunlock(rs);
    637 	return (0);
    638 
    639 }
    640 
    641 void
    642 raidstrategy(bp)
    643 	register struct buf *bp;
    644 {
    645 	register int s;
    646 
    647 	unsigned int raidID = raidunit(bp->b_dev);
    648 	RF_Raid_t *raidPtr;
    649 	struct raid_softc *rs = &raid_softc[raidID];
    650 	struct disklabel *lp;
    651 	int     wlabel;
    652 
    653 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    654 		bp->b_error = ENXIO;
    655 		bp->b_flags = B_ERROR;
    656 		bp->b_resid = bp->b_bcount;
    657 		biodone(bp);
    658 		return;
    659 	}
    660 	if (raidID >= numraid || !raidPtrs[raidID]) {
    661 		bp->b_error = ENODEV;
    662 		bp->b_flags |= B_ERROR;
    663 		bp->b_resid = bp->b_bcount;
    664 		biodone(bp);
    665 		return;
    666 	}
    667 	raidPtr = raidPtrs[raidID];
    668 	if (!raidPtr->valid) {
    669 		bp->b_error = ENODEV;
    670 		bp->b_flags |= B_ERROR;
    671 		bp->b_resid = bp->b_bcount;
    672 		biodone(bp);
    673 		return;
    674 	}
    675 	if (bp->b_bcount == 0) {
    676 		db1_printf(("b_bcount is zero..\n"));
    677 		biodone(bp);
    678 		return;
    679 	}
    680 	lp = rs->sc_dkdev.dk_label;
    681 
    682 	/*
    683 	 * Do bounds checking and adjust transfer.  If there's an
    684 	 * error, the bounds check will flag that for us.
    685 	 */
    686 
    687 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    688 	if (DISKPART(bp->b_dev) != RAW_PART)
    689 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    690 			db1_printf(("Bounds check failed!!:%d %d\n",
    691 				(int) bp->b_blkno, (int) wlabel));
    692 			biodone(bp);
    693 			return;
    694 		}
    695 	s = splbio();
    696 
    697 	bp->b_resid = 0;
    698 
    699 	/* stuff it onto our queue */
    700 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    701 
    702 	raidstart(raidPtrs[raidID]);
    703 
    704 	splx(s);
    705 }
    706 /* ARGSUSED */
    707 int
    708 raidread(dev, uio, flags)
    709 	dev_t   dev;
    710 	struct uio *uio;
    711 	int     flags;
    712 {
    713 	int     unit = raidunit(dev);
    714 	struct raid_softc *rs;
    715 	int     part;
    716 
    717 	if (unit >= numraid)
    718 		return (ENXIO);
    719 	rs = &raid_softc[unit];
    720 
    721 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    722 		return (ENXIO);
    723 	part = DISKPART(dev);
    724 
    725 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    726 
    727 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    728 
    729 }
    730 /* ARGSUSED */
    731 int
    732 raidwrite(dev, uio, flags)
    733 	dev_t   dev;
    734 	struct uio *uio;
    735 	int     flags;
    736 {
    737 	int     unit = raidunit(dev);
    738 	struct raid_softc *rs;
    739 
    740 	if (unit >= numraid)
    741 		return (ENXIO);
    742 	rs = &raid_softc[unit];
    743 
    744 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    745 		return (ENXIO);
    746 	db1_printf(("raidwrite\n"));
    747 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    748 
    749 }
    750 
    751 int
    752 raidioctl(dev, cmd, data, flag, p)
    753 	dev_t   dev;
    754 	u_long  cmd;
    755 	caddr_t data;
    756 	int     flag;
    757 	struct proc *p;
    758 {
    759 	int     unit = raidunit(dev);
    760 	int     error = 0;
    761 	int     part, pmask;
    762 	struct raid_softc *rs;
    763 	RF_Config_t *k_cfg, *u_cfg;
    764 	RF_Raid_t *raidPtr;
    765 	RF_RaidDisk_t *diskPtr;
    766 	RF_AccTotals_t *totals;
    767 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    768 	u_char *specific_buf;
    769 	int retcode = 0;
    770 	int row;
    771 	int column;
    772 	struct rf_recon_req *rrcopy, *rr;
    773 	RF_ComponentLabel_t *clabel;
    774 	RF_ComponentLabel_t ci_label;
    775 	RF_ComponentLabel_t **clabel_ptr;
    776 	RF_SingleComponent_t *sparePtr,*componentPtr;
    777 	RF_SingleComponent_t hot_spare;
    778 	RF_SingleComponent_t component;
    779 	int i, j, d;
    780 
    781 	if (unit >= numraid)
    782 		return (ENXIO);
    783 	rs = &raid_softc[unit];
    784 	raidPtr = raidPtrs[unit];
    785 
    786 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    787 		(int) DISKPART(dev), (int) unit, (int) cmd));
    788 
    789 	/* Must be open for writes for these commands... */
    790 	switch (cmd) {
    791 	case DIOCSDINFO:
    792 	case DIOCWDINFO:
    793 	case DIOCWLABEL:
    794 		if ((flag & FWRITE) == 0)
    795 			return (EBADF);
    796 	}
    797 
    798 	/* Must be initialized for these... */
    799 	switch (cmd) {
    800 	case DIOCGDINFO:
    801 	case DIOCSDINFO:
    802 	case DIOCWDINFO:
    803 	case DIOCGPART:
    804 	case DIOCWLABEL:
    805 	case DIOCGDEFLABEL:
    806 	case RAIDFRAME_SHUTDOWN:
    807 	case RAIDFRAME_REWRITEPARITY:
    808 	case RAIDFRAME_GET_INFO:
    809 	case RAIDFRAME_RESET_ACCTOTALS:
    810 	case RAIDFRAME_GET_ACCTOTALS:
    811 	case RAIDFRAME_KEEP_ACCTOTALS:
    812 	case RAIDFRAME_GET_SIZE:
    813 	case RAIDFRAME_FAIL_DISK:
    814 	case RAIDFRAME_COPYBACK:
    815 	case RAIDFRAME_CHECK_RECON_STATUS:
    816 	case RAIDFRAME_GET_COMPONENT_LABEL:
    817 	case RAIDFRAME_SET_COMPONENT_LABEL:
    818 	case RAIDFRAME_ADD_HOT_SPARE:
    819 	case RAIDFRAME_REMOVE_HOT_SPARE:
    820 	case RAIDFRAME_INIT_LABELS:
    821 	case RAIDFRAME_REBUILD_IN_PLACE:
    822 	case RAIDFRAME_CHECK_PARITY:
    823 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    824 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    825 	case RAIDFRAME_SET_AUTOCONFIG:
    826 	case RAIDFRAME_SET_ROOT:
    827 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    828 			return (ENXIO);
    829 	}
    830 
    831 	switch (cmd) {
    832 
    833 		/* configure the system */
    834 	case RAIDFRAME_CONFIGURE:
    835 
    836 		if (raidPtr->valid) {
    837 			/* There is a valid RAID set running on this unit! */
    838 			printf("raid%d: Device already configured!\n",unit);
    839 		}
    840 
    841 		/* copy-in the configuration information */
    842 		/* data points to a pointer to the configuration structure */
    843 
    844 		u_cfg = *((RF_Config_t **) data);
    845 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    846 		if (k_cfg == NULL) {
    847 			return (ENOMEM);
    848 		}
    849 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    850 		    sizeof(RF_Config_t));
    851 		if (retcode) {
    852 			RF_Free(k_cfg, sizeof(RF_Config_t));
    853 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    854 				retcode));
    855 			return (retcode);
    856 		}
    857 		/* allocate a buffer for the layout-specific data, and copy it
    858 		 * in */
    859 		if (k_cfg->layoutSpecificSize) {
    860 			if (k_cfg->layoutSpecificSize > 10000) {
    861 				/* sanity check */
    862 				RF_Free(k_cfg, sizeof(RF_Config_t));
    863 				return (EINVAL);
    864 			}
    865 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    866 			    (u_char *));
    867 			if (specific_buf == NULL) {
    868 				RF_Free(k_cfg, sizeof(RF_Config_t));
    869 				return (ENOMEM);
    870 			}
    871 			retcode = copyin(k_cfg->layoutSpecific,
    872 			    (caddr_t) specific_buf,
    873 			    k_cfg->layoutSpecificSize);
    874 			if (retcode) {
    875 				RF_Free(k_cfg, sizeof(RF_Config_t));
    876 				RF_Free(specific_buf,
    877 					k_cfg->layoutSpecificSize);
    878 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    879 					retcode));
    880 				return (retcode);
    881 			}
    882 		} else
    883 			specific_buf = NULL;
    884 		k_cfg->layoutSpecific = specific_buf;
    885 
    886 		/* should do some kind of sanity check on the configuration.
    887 		 * Store the sum of all the bytes in the last byte? */
    888 
    889 		/* configure the system */
    890 
    891 		/*
    892 		 * Clear the entire RAID descriptor, just to make sure
    893 		 *  there is no stale data left in the case of a
    894 		 *  reconfiguration
    895 		 */
    896 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    897 		raidPtr->raidid = unit;
    898 
    899 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    900 
    901 		if (retcode == 0) {
    902 
    903 			/* allow this many simultaneous IO's to
    904 			   this RAID device */
    905 			raidPtr->openings = RAIDOUTSTANDING;
    906 
    907 			raidinit(raidPtr);
    908 			rf_markalldirty(raidPtr);
    909 		}
    910 		/* free the buffers.  No return code here. */
    911 		if (k_cfg->layoutSpecificSize) {
    912 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    913 		}
    914 		RF_Free(k_cfg, sizeof(RF_Config_t));
    915 
    916 		return (retcode);
    917 
    918 		/* shutdown the system */
    919 	case RAIDFRAME_SHUTDOWN:
    920 
    921 		if ((error = raidlock(rs)) != 0)
    922 			return (error);
    923 
    924 		/*
    925 		 * If somebody has a partition mounted, we shouldn't
    926 		 * shutdown.
    927 		 */
    928 
    929 		part = DISKPART(dev);
    930 		pmask = (1 << part);
    931 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    932 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    933 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    934 			raidunlock(rs);
    935 			return (EBUSY);
    936 		}
    937 
    938 		retcode = rf_Shutdown(raidPtr);
    939 
    940 		pool_destroy(&rs->sc_cbufpool);
    941 
    942 		/* It's no longer initialized... */
    943 		rs->sc_flags &= ~RAIDF_INITED;
    944 
    945 		/* Detach the disk. */
    946 		disk_detach(&rs->sc_dkdev);
    947 
    948 		raidunlock(rs);
    949 
    950 		return (retcode);
    951 	case RAIDFRAME_GET_COMPONENT_LABEL:
    952 		clabel_ptr = (RF_ComponentLabel_t **) data;
    953 		/* need to read the component label for the disk indicated
    954 		   by row,column in clabel */
    955 
    956 		/* For practice, let's get it directly fromdisk, rather
    957 		   than from the in-core copy */
    958 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    959 			   (RF_ComponentLabel_t *));
    960 		if (clabel == NULL)
    961 			return (ENOMEM);
    962 
    963 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    964 
    965 		retcode = copyin( *clabel_ptr, clabel,
    966 				  sizeof(RF_ComponentLabel_t));
    967 
    968 		if (retcode) {
    969 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    970 			return(retcode);
    971 		}
    972 
    973 		row = clabel->row;
    974 		column = clabel->column;
    975 
    976 		if ((row < 0) || (row >= raidPtr->numRow) ||
    977 		    (column < 0) || (column >= raidPtr->numCol)) {
    978 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    979 			return(EINVAL);
    980 		}
    981 
    982 		raidread_component_label(raidPtr->Disks[row][column].dev,
    983 				raidPtr->raid_cinfo[row][column].ci_vp,
    984 				clabel );
    985 
    986 		retcode = copyout((caddr_t) clabel,
    987 				  (caddr_t) *clabel_ptr,
    988 				  sizeof(RF_ComponentLabel_t));
    989 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    990 		return (retcode);
    991 
    992 	case RAIDFRAME_SET_COMPONENT_LABEL:
    993 		clabel = (RF_ComponentLabel_t *) data;
    994 
    995 		/* XXX check the label for valid stuff... */
    996 		/* Note that some things *should not* get modified --
    997 		   the user should be re-initing the labels instead of
    998 		   trying to patch things.
    999 		   */
   1000 
   1001 		printf("Got component label:\n");
   1002 		printf("Version: %d\n",clabel->version);
   1003 		printf("Serial Number: %d\n",clabel->serial_number);
   1004 		printf("Mod counter: %d\n",clabel->mod_counter);
   1005 		printf("Row: %d\n", clabel->row);
   1006 		printf("Column: %d\n", clabel->column);
   1007 		printf("Num Rows: %d\n", clabel->num_rows);
   1008 		printf("Num Columns: %d\n", clabel->num_columns);
   1009 		printf("Clean: %d\n", clabel->clean);
   1010 		printf("Status: %d\n", clabel->status);
   1011 
   1012 		row = clabel->row;
   1013 		column = clabel->column;
   1014 
   1015 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1016 		    (column < 0) || (column >= raidPtr->numCol)) {
   1017 			return(EINVAL);
   1018 		}
   1019 
   1020 		/* XXX this isn't allowed to do anything for now :-) */
   1021 
   1022 		/* XXX and before it is, we need to fill in the rest
   1023 		   of the fields!?!?!?! */
   1024 #if 0
   1025 		raidwrite_component_label(
   1026                             raidPtr->Disks[row][column].dev,
   1027 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1028 			    clabel );
   1029 #endif
   1030 		return (0);
   1031 
   1032 	case RAIDFRAME_INIT_LABELS:
   1033 		clabel = (RF_ComponentLabel_t *) data;
   1034 		/*
   1035 		   we only want the serial number from
   1036 		   the above.  We get all the rest of the information
   1037 		   from the config that was used to create this RAID
   1038 		   set.
   1039 		   */
   1040 
   1041 		raidPtr->serial_number = clabel->serial_number;
   1042 
   1043 		raid_init_component_label(raidPtr, &ci_label);
   1044 		ci_label.serial_number = clabel->serial_number;
   1045 
   1046 		for(row=0;row<raidPtr->numRow;row++) {
   1047 			ci_label.row = row;
   1048 			for(column=0;column<raidPtr->numCol;column++) {
   1049 				diskPtr = &raidPtr->Disks[row][column];
   1050 				ci_label.partitionSize = diskPtr->partitionSize;
   1051 				ci_label.column = column;
   1052 				raidwrite_component_label(
   1053 				  raidPtr->Disks[row][column].dev,
   1054 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1055 				  &ci_label );
   1056 			}
   1057 		}
   1058 
   1059 		return (retcode);
   1060 	case RAIDFRAME_SET_AUTOCONFIG:
   1061 		d = rf_set_autoconfig(raidPtr, *data);
   1062 		printf("New autoconfig value is: %d\n", d);
   1063 		*data = d;
   1064 		return (retcode);
   1065 
   1066 	case RAIDFRAME_SET_ROOT:
   1067 		d = rf_set_rootpartition(raidPtr, *data);
   1068 		printf("New rootpartition value is: %d\n", d);
   1069 		*data = d;
   1070 		return (retcode);
   1071 
   1072 		/* initialize all parity */
   1073 	case RAIDFRAME_REWRITEPARITY:
   1074 
   1075 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1076 			/* Parity for RAID 0 is trivially correct */
   1077 			raidPtr->parity_good = RF_RAID_CLEAN;
   1078 			return(0);
   1079 		}
   1080 
   1081 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1082 			/* Re-write is already in progress! */
   1083 			return(EINVAL);
   1084 		}
   1085 
   1086 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1087 					   rf_RewriteParityThread,
   1088 					   raidPtr,"raid_parity");
   1089 		return (retcode);
   1090 
   1091 
   1092 	case RAIDFRAME_ADD_HOT_SPARE:
   1093 		sparePtr = (RF_SingleComponent_t *) data;
   1094 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1095 		printf("Adding spare\n");
   1096 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1097 		return(retcode);
   1098 
   1099 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1100 		return(retcode);
   1101 
   1102 	case RAIDFRAME_REBUILD_IN_PLACE:
   1103 
   1104 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1105 			/* Can't do this on a RAID 0!! */
   1106 			return(EINVAL);
   1107 		}
   1108 
   1109 		if (raidPtr->recon_in_progress == 1) {
   1110 			/* a reconstruct is already in progress! */
   1111 			return(EINVAL);
   1112 		}
   1113 
   1114 		componentPtr = (RF_SingleComponent_t *) data;
   1115 		memcpy( &component, componentPtr,
   1116 			sizeof(RF_SingleComponent_t));
   1117 		row = component.row;
   1118 		column = component.column;
   1119 		printf("Rebuild: %d %d\n",row, column);
   1120 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1121 		    (column < 0) || (column >= raidPtr->numCol)) {
   1122 			return(EINVAL);
   1123 		}
   1124 
   1125 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1126 		if (rrcopy == NULL)
   1127 			return(ENOMEM);
   1128 
   1129 		rrcopy->raidPtr = (void *) raidPtr;
   1130 		rrcopy->row = row;
   1131 		rrcopy->col = column;
   1132 
   1133 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1134 					   rf_ReconstructInPlaceThread,
   1135 					   rrcopy,"raid_reconip");
   1136 		return(retcode);
   1137 
   1138 	case RAIDFRAME_GET_INFO:
   1139 		if (!raidPtr->valid)
   1140 			return (ENODEV);
   1141 		ucfgp = (RF_DeviceConfig_t **) data;
   1142 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1143 			  (RF_DeviceConfig_t *));
   1144 		if (d_cfg == NULL)
   1145 			return (ENOMEM);
   1146 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1147 		d_cfg->rows = raidPtr->numRow;
   1148 		d_cfg->cols = raidPtr->numCol;
   1149 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1150 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1151 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1152 			return (ENOMEM);
   1153 		}
   1154 		d_cfg->nspares = raidPtr->numSpare;
   1155 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1156 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1157 			return (ENOMEM);
   1158 		}
   1159 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1160 		d = 0;
   1161 		for (i = 0; i < d_cfg->rows; i++) {
   1162 			for (j = 0; j < d_cfg->cols; j++) {
   1163 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1164 				d++;
   1165 			}
   1166 		}
   1167 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1168 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1169 		}
   1170 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1171 				  sizeof(RF_DeviceConfig_t));
   1172 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1173 
   1174 		return (retcode);
   1175 
   1176 	case RAIDFRAME_CHECK_PARITY:
   1177 		*(int *) data = raidPtr->parity_good;
   1178 		return (0);
   1179 
   1180 	case RAIDFRAME_RESET_ACCTOTALS:
   1181 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1182 		return (0);
   1183 
   1184 	case RAIDFRAME_GET_ACCTOTALS:
   1185 		totals = (RF_AccTotals_t *) data;
   1186 		*totals = raidPtr->acc_totals;
   1187 		return (0);
   1188 
   1189 	case RAIDFRAME_KEEP_ACCTOTALS:
   1190 		raidPtr->keep_acc_totals = *(int *)data;
   1191 		return (0);
   1192 
   1193 	case RAIDFRAME_GET_SIZE:
   1194 		*(int *) data = raidPtr->totalSectors;
   1195 		return (0);
   1196 
   1197 		/* fail a disk & optionally start reconstruction */
   1198 	case RAIDFRAME_FAIL_DISK:
   1199 
   1200 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1201 			/* Can't do this on a RAID 0!! */
   1202 			return(EINVAL);
   1203 		}
   1204 
   1205 		rr = (struct rf_recon_req *) data;
   1206 
   1207 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1208 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1209 			return (EINVAL);
   1210 
   1211 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1212 		       unit, rr->row, rr->col);
   1213 
   1214 		/* make a copy of the recon request so that we don't rely on
   1215 		 * the user's buffer */
   1216 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1217 		if (rrcopy == NULL)
   1218 			return(ENOMEM);
   1219 		bcopy(rr, rrcopy, sizeof(*rr));
   1220 		rrcopy->raidPtr = (void *) raidPtr;
   1221 
   1222 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1223 					   rf_ReconThread,
   1224 					   rrcopy,"raid_recon");
   1225 		return (0);
   1226 
   1227 		/* invoke a copyback operation after recon on whatever disk
   1228 		 * needs it, if any */
   1229 	case RAIDFRAME_COPYBACK:
   1230 
   1231 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1232 			/* This makes no sense on a RAID 0!! */
   1233 			return(EINVAL);
   1234 		}
   1235 
   1236 		if (raidPtr->copyback_in_progress == 1) {
   1237 			/* Copyback is already in progress! */
   1238 			return(EINVAL);
   1239 		}
   1240 
   1241 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1242 					   rf_CopybackThread,
   1243 					   raidPtr,"raid_copyback");
   1244 		return (retcode);
   1245 
   1246 		/* return the percentage completion of reconstruction */
   1247 	case RAIDFRAME_CHECK_RECON_STATUS:
   1248 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1249 			/* This makes no sense on a RAID 0 */
   1250 			return(EINVAL);
   1251 		}
   1252 		row = 0; /* XXX we only consider a single row... */
   1253 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1254 			*(int *) data = 100;
   1255 		else
   1256 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1257 		return (0);
   1258 
   1259 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1260 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1261 			/* This makes no sense on a RAID 0 */
   1262 			return(EINVAL);
   1263 		}
   1264 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1265 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1266 		} else {
   1267 			*(int *) data = 100;
   1268 		}
   1269 		return (0);
   1270 
   1271 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1272 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1273 			/* This makes no sense on a RAID 0 */
   1274 			return(EINVAL);
   1275 		}
   1276 		if (raidPtr->copyback_in_progress == 1) {
   1277 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1278 				raidPtr->Layout.numStripe;
   1279 		} else {
   1280 			*(int *) data = 100;
   1281 		}
   1282 		return (0);
   1283 
   1284 
   1285 		/* the sparetable daemon calls this to wait for the kernel to
   1286 		 * need a spare table. this ioctl does not return until a
   1287 		 * spare table is needed. XXX -- calling mpsleep here in the
   1288 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1289 		 * -- I should either compute the spare table in the kernel,
   1290 		 * or have a different -- XXX XXX -- interface (a different
   1291 		 * character device) for delivering the table     -- XXX */
   1292 #if 0
   1293 	case RAIDFRAME_SPARET_WAIT:
   1294 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1295 		while (!rf_sparet_wait_queue)
   1296 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1297 		waitreq = rf_sparet_wait_queue;
   1298 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1299 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1300 
   1301 		/* structure assignment */
   1302 		*((RF_SparetWait_t *) data) = *waitreq;
   1303 
   1304 		RF_Free(waitreq, sizeof(*waitreq));
   1305 		return (0);
   1306 
   1307 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1308 		 * code in it that will cause the dameon to exit */
   1309 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1310 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1311 		waitreq->fcol = -1;
   1312 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1313 		waitreq->next = rf_sparet_wait_queue;
   1314 		rf_sparet_wait_queue = waitreq;
   1315 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1316 		wakeup(&rf_sparet_wait_queue);
   1317 		return (0);
   1318 
   1319 		/* used by the spare table daemon to deliver a spare table
   1320 		 * into the kernel */
   1321 	case RAIDFRAME_SEND_SPARET:
   1322 
   1323 		/* install the spare table */
   1324 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1325 
   1326 		/* respond to the requestor.  the return status of the spare
   1327 		 * table installation is passed in the "fcol" field */
   1328 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1329 		waitreq->fcol = retcode;
   1330 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1331 		waitreq->next = rf_sparet_resp_queue;
   1332 		rf_sparet_resp_queue = waitreq;
   1333 		wakeup(&rf_sparet_resp_queue);
   1334 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1335 
   1336 		return (retcode);
   1337 #endif
   1338 
   1339 	default:
   1340 		break; /* fall through to the os-specific code below */
   1341 
   1342 	}
   1343 
   1344 	if (!raidPtr->valid)
   1345 		return (EINVAL);
   1346 
   1347 	/*
   1348 	 * Add support for "regular" device ioctls here.
   1349 	 */
   1350 
   1351 	switch (cmd) {
   1352 	case DIOCGDINFO:
   1353 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1354 		break;
   1355 
   1356 	case DIOCGPART:
   1357 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1358 		((struct partinfo *) data)->part =
   1359 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1360 		break;
   1361 
   1362 	case DIOCWDINFO:
   1363 	case DIOCSDINFO:
   1364 		if ((error = raidlock(rs)) != 0)
   1365 			return (error);
   1366 
   1367 		rs->sc_flags |= RAIDF_LABELLING;
   1368 
   1369 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1370 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1371 		if (error == 0) {
   1372 			if (cmd == DIOCWDINFO)
   1373 				error = writedisklabel(RAIDLABELDEV(dev),
   1374 				    raidstrategy, rs->sc_dkdev.dk_label,
   1375 				    rs->sc_dkdev.dk_cpulabel);
   1376 		}
   1377 		rs->sc_flags &= ~RAIDF_LABELLING;
   1378 
   1379 		raidunlock(rs);
   1380 
   1381 		if (error)
   1382 			return (error);
   1383 		break;
   1384 
   1385 	case DIOCWLABEL:
   1386 		if (*(int *) data != 0)
   1387 			rs->sc_flags |= RAIDF_WLABEL;
   1388 		else
   1389 			rs->sc_flags &= ~RAIDF_WLABEL;
   1390 		break;
   1391 
   1392 	case DIOCGDEFLABEL:
   1393 		raidgetdefaultlabel(raidPtr, rs,
   1394 		    (struct disklabel *) data);
   1395 		break;
   1396 
   1397 	default:
   1398 		retcode = ENOTTY;
   1399 	}
   1400 	return (retcode);
   1401 
   1402 }
   1403 
   1404 
   1405 /* raidinit -- complete the rest of the initialization for the
   1406    RAIDframe device.  */
   1407 
   1408 
   1409 static void
   1410 raidinit(raidPtr)
   1411 	RF_Raid_t *raidPtr;
   1412 {
   1413 	struct raid_softc *rs;
   1414 	int     unit;
   1415 
   1416 	unit = raidPtr->raidid;
   1417 
   1418 	rs = &raid_softc[unit];
   1419 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1420 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1421 
   1422 
   1423 	/* XXX should check return code first... */
   1424 	rs->sc_flags |= RAIDF_INITED;
   1425 
   1426 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1427 
   1428 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1429 
   1430 	/* disk_attach actually creates space for the CPU disklabel, among
   1431 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1432 	 * with disklabels. */
   1433 
   1434 	disk_attach(&rs->sc_dkdev);
   1435 
   1436 	/* XXX There may be a weird interaction here between this, and
   1437 	 * protectedSectors, as used in RAIDframe.  */
   1438 
   1439 	rs->sc_size = raidPtr->totalSectors;
   1440 
   1441 }
   1442 
   1443 /* wake up the daemon & tell it to get us a spare table
   1444  * XXX
   1445  * the entries in the queues should be tagged with the raidPtr
   1446  * so that in the extremely rare case that two recons happen at once,
   1447  * we know for which device were requesting a spare table
   1448  * XXX
   1449  *
   1450  * XXX This code is not currently used. GO
   1451  */
   1452 int
   1453 rf_GetSpareTableFromDaemon(req)
   1454 	RF_SparetWait_t *req;
   1455 {
   1456 	int     retcode;
   1457 
   1458 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1459 	req->next = rf_sparet_wait_queue;
   1460 	rf_sparet_wait_queue = req;
   1461 	wakeup(&rf_sparet_wait_queue);
   1462 
   1463 	/* mpsleep unlocks the mutex */
   1464 	while (!rf_sparet_resp_queue) {
   1465 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1466 		    "raidframe getsparetable", 0);
   1467 	}
   1468 	req = rf_sparet_resp_queue;
   1469 	rf_sparet_resp_queue = req->next;
   1470 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1471 
   1472 	retcode = req->fcol;
   1473 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1474 					 * alloc'd */
   1475 	return (retcode);
   1476 }
   1477 
   1478 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1479  * bp & passes it down.
   1480  * any calls originating in the kernel must use non-blocking I/O
   1481  * do some extra sanity checking to return "appropriate" error values for
   1482  * certain conditions (to make some standard utilities work)
   1483  *
   1484  * Formerly known as: rf_DoAccessKernel
   1485  */
   1486 void
   1487 raidstart(raidPtr)
   1488 	RF_Raid_t *raidPtr;
   1489 {
   1490 	RF_SectorCount_t num_blocks, pb, sum;
   1491 	RF_RaidAddr_t raid_addr;
   1492 	int     retcode;
   1493 	struct partition *pp;
   1494 	daddr_t blocknum;
   1495 	int     unit;
   1496 	struct raid_softc *rs;
   1497 	int     do_async;
   1498 	struct buf *bp;
   1499 
   1500 	unit = raidPtr->raidid;
   1501 	rs = &raid_softc[unit];
   1502 
   1503 	/* quick check to see if anything has died recently */
   1504 	RF_LOCK_MUTEX(raidPtr->mutex);
   1505 	if (raidPtr->numNewFailures > 0) {
   1506 		rf_update_component_labels(raidPtr);
   1507 		raidPtr->numNewFailures--;
   1508 	}
   1509 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1510 
   1511 	/* Check to see if we're at the limit... */
   1512 	RF_LOCK_MUTEX(raidPtr->mutex);
   1513 	while (raidPtr->openings > 0) {
   1514 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1515 
   1516 		/* get the next item, if any, from the queue */
   1517 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1518 			/* nothing more to do */
   1519 			return;
   1520 		}
   1521 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1522 
   1523 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1524 		 * partition.. Need to make it absolute to the underlying
   1525 		 * device.. */
   1526 
   1527 		blocknum = bp->b_blkno;
   1528 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1529 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1530 			blocknum += pp->p_offset;
   1531 		}
   1532 
   1533 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1534 			    (int) blocknum));
   1535 
   1536 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1537 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1538 
   1539 		/* *THIS* is where we adjust what block we're going to...
   1540 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1541 		raid_addr = blocknum;
   1542 
   1543 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1544 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1545 		sum = raid_addr + num_blocks + pb;
   1546 		if (1 || rf_debugKernelAccess) {
   1547 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1548 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1549 				    (int) pb, (int) bp->b_resid));
   1550 		}
   1551 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1552 		    || (sum < num_blocks) || (sum < pb)) {
   1553 			bp->b_error = ENOSPC;
   1554 			bp->b_flags |= B_ERROR;
   1555 			bp->b_resid = bp->b_bcount;
   1556 			biodone(bp);
   1557 			RF_LOCK_MUTEX(raidPtr->mutex);
   1558 			continue;
   1559 		}
   1560 		/*
   1561 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1562 		 */
   1563 
   1564 		if (bp->b_bcount & raidPtr->sectorMask) {
   1565 			bp->b_error = EINVAL;
   1566 			bp->b_flags |= B_ERROR;
   1567 			bp->b_resid = bp->b_bcount;
   1568 			biodone(bp);
   1569 			RF_LOCK_MUTEX(raidPtr->mutex);
   1570 			continue;
   1571 
   1572 		}
   1573 		db1_printf(("Calling DoAccess..\n"));
   1574 
   1575 
   1576 		RF_LOCK_MUTEX(raidPtr->mutex);
   1577 		raidPtr->openings--;
   1578 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1579 
   1580 		/*
   1581 		 * Everything is async.
   1582 		 */
   1583 		do_async = 1;
   1584 
   1585 		/* don't ever condition on bp->b_flags & B_WRITE.
   1586 		 * always condition on B_READ instead */
   1587 
   1588 		/* XXX we're still at splbio() here... do we *really*
   1589 		   need to be? */
   1590 
   1591 
   1592 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1593 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1594 				      do_async, raid_addr, num_blocks,
   1595 				      bp->b_un.b_addr, bp, NULL, NULL,
   1596 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1597 
   1598 
   1599 		RF_LOCK_MUTEX(raidPtr->mutex);
   1600 	}
   1601 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1602 }
   1603 
   1604 
   1605 
   1606 
   1607 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1608 
   1609 int
   1610 rf_DispatchKernelIO(queue, req)
   1611 	RF_DiskQueue_t *queue;
   1612 	RF_DiskQueueData_t *req;
   1613 {
   1614 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1615 	struct buf *bp;
   1616 	struct raidbuf *raidbp = NULL;
   1617 	struct raid_softc *rs;
   1618 	int     unit;
   1619 	int s;
   1620 
   1621 	s=0;
   1622 	/* s = splbio();*/ /* want to test this */
   1623 	/* XXX along with the vnode, we also need the softc associated with
   1624 	 * this device.. */
   1625 
   1626 	req->queue = queue;
   1627 
   1628 	unit = queue->raidPtr->raidid;
   1629 
   1630 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1631 
   1632 	if (unit >= numraid) {
   1633 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1634 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1635 	}
   1636 	rs = &raid_softc[unit];
   1637 
   1638 	/* XXX is this the right place? */
   1639 	disk_busy(&rs->sc_dkdev);
   1640 
   1641 	bp = req->bp;
   1642 #if 1
   1643 	/* XXX when there is a physical disk failure, someone is passing us a
   1644 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1645 	 * without taking a performance hit... (not sure where the real bug
   1646 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1647 
   1648 	if (bp->b_flags & B_ERROR) {
   1649 		bp->b_flags &= ~B_ERROR;
   1650 	}
   1651 	if (bp->b_error != 0) {
   1652 		bp->b_error = 0;
   1653 	}
   1654 #endif
   1655 	raidbp = RAIDGETBUF(rs);
   1656 
   1657 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1658 
   1659 	/*
   1660 	 * context for raidiodone
   1661 	 */
   1662 	raidbp->rf_obp = bp;
   1663 	raidbp->req = req;
   1664 
   1665 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1666 
   1667 	switch (req->type) {
   1668 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1669 		/* XXX need to do something extra here.. */
   1670 		/* I'm leaving this in, as I've never actually seen it used,
   1671 		 * and I'd like folks to report it... GO */
   1672 		printf(("WAKEUP CALLED\n"));
   1673 		queue->numOutstanding++;
   1674 
   1675 		/* XXX need to glue the original buffer into this??  */
   1676 
   1677 		KernelWakeupFunc(&raidbp->rf_buf);
   1678 		break;
   1679 
   1680 	case RF_IO_TYPE_READ:
   1681 	case RF_IO_TYPE_WRITE:
   1682 
   1683 		if (req->tracerec) {
   1684 			RF_ETIMER_START(req->tracerec->timer);
   1685 		}
   1686 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1687 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1688 		    req->sectorOffset, req->numSector,
   1689 		    req->buf, KernelWakeupFunc, (void *) req,
   1690 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1691 
   1692 		if (rf_debugKernelAccess) {
   1693 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1694 				(long) bp->b_blkno));
   1695 		}
   1696 		queue->numOutstanding++;
   1697 		queue->last_deq_sector = req->sectorOffset;
   1698 		/* acc wouldn't have been let in if there were any pending
   1699 		 * reqs at any other priority */
   1700 		queue->curPriority = req->priority;
   1701 
   1702 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1703 			req->type, unit, queue->row, queue->col));
   1704 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1705 			(int) req->sectorOffset, (int) req->numSector,
   1706 			(int) (req->numSector <<
   1707 			    queue->raidPtr->logBytesPerSector),
   1708 			(int) queue->raidPtr->logBytesPerSector));
   1709 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1710 			raidbp->rf_buf.b_vp->v_numoutput++;
   1711 		}
   1712 		VOP_STRATEGY(&raidbp->rf_buf);
   1713 
   1714 		break;
   1715 
   1716 	default:
   1717 		panic("bad req->type in rf_DispatchKernelIO");
   1718 	}
   1719 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1720 	/* splx(s); */ /* want to test this */
   1721 	return (0);
   1722 }
   1723 /* this is the callback function associated with a I/O invoked from
   1724    kernel code.
   1725  */
   1726 static void
   1727 KernelWakeupFunc(vbp)
   1728 	struct buf *vbp;
   1729 {
   1730 	RF_DiskQueueData_t *req = NULL;
   1731 	RF_DiskQueue_t *queue;
   1732 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1733 	struct buf *bp;
   1734 	struct raid_softc *rs;
   1735 	int     unit;
   1736 	register int s;
   1737 
   1738 	s = splbio();
   1739 	db1_printf(("recovering the request queue:\n"));
   1740 	req = raidbp->req;
   1741 
   1742 	bp = raidbp->rf_obp;
   1743 
   1744 	queue = (RF_DiskQueue_t *) req->queue;
   1745 
   1746 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1747 		bp->b_flags |= B_ERROR;
   1748 		bp->b_error = raidbp->rf_buf.b_error ?
   1749 		    raidbp->rf_buf.b_error : EIO;
   1750 	}
   1751 
   1752 	/* XXX methinks this could be wrong... */
   1753 #if 1
   1754 	bp->b_resid = raidbp->rf_buf.b_resid;
   1755 #endif
   1756 
   1757 	if (req->tracerec) {
   1758 		RF_ETIMER_STOP(req->tracerec->timer);
   1759 		RF_ETIMER_EVAL(req->tracerec->timer);
   1760 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1761 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1762 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1763 		req->tracerec->num_phys_ios++;
   1764 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1765 	}
   1766 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1767 
   1768 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1769 
   1770 
   1771 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1772 	 * ballistic, and mark the component as hosed... */
   1773 
   1774 	if (bp->b_flags & B_ERROR) {
   1775 		/* Mark the disk as dead */
   1776 		/* but only mark it once... */
   1777 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1778 		    rf_ds_optimal) {
   1779 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1780 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1781 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1782 			    rf_ds_failed;
   1783 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1784 			queue->raidPtr->numFailures++;
   1785 			queue->raidPtr->numNewFailures++;
   1786 			/* XXX here we should bump the version number for each component, and write that data out */
   1787 		} else {	/* Disk is already dead... */
   1788 			/* printf("Disk already marked as dead!\n"); */
   1789 		}
   1790 
   1791 	}
   1792 
   1793 	rs = &raid_softc[unit];
   1794 	RAIDPUTBUF(rs, raidbp);
   1795 
   1796 
   1797 	if (bp->b_resid == 0) {
   1798 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1799 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1800 	}
   1801 
   1802 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1803 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1804 
   1805 	splx(s);
   1806 }
   1807 
   1808 
   1809 
   1810 /*
   1811  * initialize a buf structure for doing an I/O in the kernel.
   1812  */
   1813 static void
   1814 InitBP(
   1815     struct buf * bp,
   1816     struct vnode * b_vp,
   1817     unsigned rw_flag,
   1818     dev_t dev,
   1819     RF_SectorNum_t startSect,
   1820     RF_SectorCount_t numSect,
   1821     caddr_t buf,
   1822     void (*cbFunc) (struct buf *),
   1823     void *cbArg,
   1824     int logBytesPerSector,
   1825     struct proc * b_proc)
   1826 {
   1827 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1828 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1829 	bp->b_bcount = numSect << logBytesPerSector;
   1830 	bp->b_bufsize = bp->b_bcount;
   1831 	bp->b_error = 0;
   1832 	bp->b_dev = dev;
   1833 	bp->b_un.b_addr = buf;
   1834 	bp->b_blkno = startSect;
   1835 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1836 	if (bp->b_bcount == 0) {
   1837 		panic("bp->b_bcount is zero in InitBP!!\n");
   1838 	}
   1839 	bp->b_proc = b_proc;
   1840 	bp->b_iodone = cbFunc;
   1841 	bp->b_vp = b_vp;
   1842 
   1843 }
   1844 
   1845 static void
   1846 raidgetdefaultlabel(raidPtr, rs, lp)
   1847 	RF_Raid_t *raidPtr;
   1848 	struct raid_softc *rs;
   1849 	struct disklabel *lp;
   1850 {
   1851 	db1_printf(("Building a default label...\n"));
   1852 	bzero(lp, sizeof(*lp));
   1853 
   1854 	/* fabricate a label... */
   1855 	lp->d_secperunit = raidPtr->totalSectors;
   1856 	lp->d_secsize = raidPtr->bytesPerSector;
   1857 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1858 	lp->d_ntracks = 1;
   1859 	lp->d_ncylinders = raidPtr->totalSectors /
   1860 		(lp->d_nsectors * lp->d_ntracks);
   1861 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1862 
   1863 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1864 	lp->d_type = DTYPE_RAID;
   1865 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1866 	lp->d_rpm = 3600;
   1867 	lp->d_interleave = 1;
   1868 	lp->d_flags = 0;
   1869 
   1870 	lp->d_partitions[RAW_PART].p_offset = 0;
   1871 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1872 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1873 	lp->d_npartitions = RAW_PART + 1;
   1874 
   1875 	lp->d_magic = DISKMAGIC;
   1876 	lp->d_magic2 = DISKMAGIC;
   1877 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1878 
   1879 }
   1880 /*
   1881  * Read the disklabel from the raid device.  If one is not present, fake one
   1882  * up.
   1883  */
   1884 static void
   1885 raidgetdisklabel(dev)
   1886 	dev_t   dev;
   1887 {
   1888 	int     unit = raidunit(dev);
   1889 	struct raid_softc *rs = &raid_softc[unit];
   1890 	char   *errstring;
   1891 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1892 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1893 	RF_Raid_t *raidPtr;
   1894 
   1895 	db1_printf(("Getting the disklabel...\n"));
   1896 
   1897 	bzero(clp, sizeof(*clp));
   1898 
   1899 	raidPtr = raidPtrs[unit];
   1900 
   1901 	raidgetdefaultlabel(raidPtr, rs, lp);
   1902 
   1903 	/*
   1904 	 * Call the generic disklabel extraction routine.
   1905 	 */
   1906 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1907 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1908 	if (errstring)
   1909 		raidmakedisklabel(rs);
   1910 	else {
   1911 		int     i;
   1912 		struct partition *pp;
   1913 
   1914 		/*
   1915 		 * Sanity check whether the found disklabel is valid.
   1916 		 *
   1917 		 * This is necessary since total size of the raid device
   1918 		 * may vary when an interleave is changed even though exactly
   1919 		 * same componets are used, and old disklabel may used
   1920 		 * if that is found.
   1921 		 */
   1922 		if (lp->d_secperunit != rs->sc_size)
   1923 			printf("WARNING: %s: "
   1924 			    "total sector size in disklabel (%d) != "
   1925 			    "the size of raid (%ld)\n", rs->sc_xname,
   1926 			    lp->d_secperunit, (long) rs->sc_size);
   1927 		for (i = 0; i < lp->d_npartitions; i++) {
   1928 			pp = &lp->d_partitions[i];
   1929 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1930 				printf("WARNING: %s: end of partition `%c' "
   1931 				    "exceeds the size of raid (%ld)\n",
   1932 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1933 		}
   1934 	}
   1935 
   1936 }
   1937 /*
   1938  * Take care of things one might want to take care of in the event
   1939  * that a disklabel isn't present.
   1940  */
   1941 static void
   1942 raidmakedisklabel(rs)
   1943 	struct raid_softc *rs;
   1944 {
   1945 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1946 	db1_printf(("Making a label..\n"));
   1947 
   1948 	/*
   1949 	 * For historical reasons, if there's no disklabel present
   1950 	 * the raw partition must be marked FS_BSDFFS.
   1951 	 */
   1952 
   1953 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1954 
   1955 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1956 
   1957 	lp->d_checksum = dkcksum(lp);
   1958 }
   1959 /*
   1960  * Lookup the provided name in the filesystem.  If the file exists,
   1961  * is a valid block device, and isn't being used by anyone else,
   1962  * set *vpp to the file's vnode.
   1963  * You'll find the original of this in ccd.c
   1964  */
   1965 int
   1966 raidlookup(path, p, vpp)
   1967 	char   *path;
   1968 	struct proc *p;
   1969 	struct vnode **vpp;	/* result */
   1970 {
   1971 	struct nameidata nd;
   1972 	struct vnode *vp;
   1973 	struct vattr va;
   1974 	int     error;
   1975 
   1976 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1977 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1978 #ifdef DEBUG
   1979 		printf("RAIDframe: vn_open returned %d\n", error);
   1980 #endif
   1981 		return (error);
   1982 	}
   1983 	vp = nd.ni_vp;
   1984 	if (vp->v_usecount > 1) {
   1985 		VOP_UNLOCK(vp, 0);
   1986 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1987 		return (EBUSY);
   1988 	}
   1989 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1990 		VOP_UNLOCK(vp, 0);
   1991 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1992 		return (error);
   1993 	}
   1994 	/* XXX: eventually we should handle VREG, too. */
   1995 	if (va.va_type != VBLK) {
   1996 		VOP_UNLOCK(vp, 0);
   1997 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1998 		return (ENOTBLK);
   1999 	}
   2000 	VOP_UNLOCK(vp, 0);
   2001 	*vpp = vp;
   2002 	return (0);
   2003 }
   2004 /*
   2005  * Wait interruptibly for an exclusive lock.
   2006  *
   2007  * XXX
   2008  * Several drivers do this; it should be abstracted and made MP-safe.
   2009  * (Hmm... where have we seen this warning before :->  GO )
   2010  */
   2011 static int
   2012 raidlock(rs)
   2013 	struct raid_softc *rs;
   2014 {
   2015 	int     error;
   2016 
   2017 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2018 		rs->sc_flags |= RAIDF_WANTED;
   2019 		if ((error =
   2020 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2021 			return (error);
   2022 	}
   2023 	rs->sc_flags |= RAIDF_LOCKED;
   2024 	return (0);
   2025 }
   2026 /*
   2027  * Unlock and wake up any waiters.
   2028  */
   2029 static void
   2030 raidunlock(rs)
   2031 	struct raid_softc *rs;
   2032 {
   2033 
   2034 	rs->sc_flags &= ~RAIDF_LOCKED;
   2035 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2036 		rs->sc_flags &= ~RAIDF_WANTED;
   2037 		wakeup(rs);
   2038 	}
   2039 }
   2040 
   2041 
   2042 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2043 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2044 
   2045 int
   2046 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2047 {
   2048 	RF_ComponentLabel_t clabel;
   2049 	raidread_component_label(dev, b_vp, &clabel);
   2050 	clabel.mod_counter = mod_counter;
   2051 	clabel.clean = RF_RAID_CLEAN;
   2052 	raidwrite_component_label(dev, b_vp, &clabel);
   2053 	return(0);
   2054 }
   2055 
   2056 
   2057 int
   2058 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2059 {
   2060 	RF_ComponentLabel_t clabel;
   2061 	raidread_component_label(dev, b_vp, &clabel);
   2062 	clabel.mod_counter = mod_counter;
   2063 	clabel.clean = RF_RAID_DIRTY;
   2064 	raidwrite_component_label(dev, b_vp, &clabel);
   2065 	return(0);
   2066 }
   2067 
   2068 /* ARGSUSED */
   2069 int
   2070 raidread_component_label(dev, b_vp, clabel)
   2071 	dev_t dev;
   2072 	struct vnode *b_vp;
   2073 	RF_ComponentLabel_t *clabel;
   2074 {
   2075 	struct buf *bp;
   2076 	int error;
   2077 
   2078 	/* XXX should probably ensure that we don't try to do this if
   2079 	   someone has changed rf_protected_sectors. */
   2080 
   2081 	/* get a block of the appropriate size... */
   2082 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2083 	bp->b_dev = dev;
   2084 
   2085 	/* get our ducks in a row for the read */
   2086 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2087 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2088 	bp->b_flags = B_BUSY | B_READ;
   2089  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2090 
   2091 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2092 
   2093 	error = biowait(bp);
   2094 
   2095 	if (!error) {
   2096 		memcpy(clabel, bp->b_un.b_addr,
   2097 		       sizeof(RF_ComponentLabel_t));
   2098 #if 0
   2099 		print_component_label( clabel );
   2100 #endif
   2101         } else {
   2102 #if 0
   2103 		printf("Failed to read RAID component label!\n");
   2104 #endif
   2105 	}
   2106 
   2107         bp->b_flags = B_INVAL | B_AGE;
   2108 	brelse(bp);
   2109 	return(error);
   2110 }
   2111 /* ARGSUSED */
   2112 int
   2113 raidwrite_component_label(dev, b_vp, clabel)
   2114 	dev_t dev;
   2115 	struct vnode *b_vp;
   2116 	RF_ComponentLabel_t *clabel;
   2117 {
   2118 	struct buf *bp;
   2119 	int error;
   2120 
   2121 	/* get a block of the appropriate size... */
   2122 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2123 	bp->b_dev = dev;
   2124 
   2125 	/* get our ducks in a row for the write */
   2126 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2127 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2128 	bp->b_flags = B_BUSY | B_WRITE;
   2129  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2130 
   2131 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2132 
   2133 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2134 
   2135 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2136 	error = biowait(bp);
   2137         bp->b_flags = B_INVAL | B_AGE;
   2138 	brelse(bp);
   2139 	if (error) {
   2140 #if 1
   2141 		printf("Failed to write RAID component info!\n");
   2142 #endif
   2143 	}
   2144 
   2145 	return(error);
   2146 }
   2147 
   2148 void
   2149 rf_markalldirty( raidPtr )
   2150 	RF_Raid_t *raidPtr;
   2151 {
   2152 	RF_ComponentLabel_t clabel;
   2153 	int r,c;
   2154 
   2155 	raidPtr->mod_counter++;
   2156 	for (r = 0; r < raidPtr->numRow; r++) {
   2157 		for (c = 0; c < raidPtr->numCol; c++) {
   2158 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2159 				raidread_component_label(
   2160 					raidPtr->Disks[r][c].dev,
   2161 					raidPtr->raid_cinfo[r][c].ci_vp,
   2162 					&clabel);
   2163 				if (clabel.status == rf_ds_spared) {
   2164 					/* XXX do something special...
   2165 					 but whatever you do, don't
   2166 					 try to access it!! */
   2167 				} else {
   2168 #if 0
   2169 				clabel.status =
   2170 					raidPtr->Disks[r][c].status;
   2171 				raidwrite_component_label(
   2172 					raidPtr->Disks[r][c].dev,
   2173 					raidPtr->raid_cinfo[r][c].ci_vp,
   2174 					&clabel);
   2175 #endif
   2176 				raidmarkdirty(
   2177 				       raidPtr->Disks[r][c].dev,
   2178 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2179 				       raidPtr->mod_counter);
   2180 				}
   2181 			}
   2182 		}
   2183 	}
   2184 	/* printf("Component labels marked dirty.\n"); */
   2185 #if 0
   2186 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2187 		sparecol = raidPtr->numCol + c;
   2188 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2189 			/*
   2190 
   2191 			   XXX this is where we get fancy and map this spare
   2192 			   into it's correct spot in the array.
   2193 
   2194 			 */
   2195 			/*
   2196 
   2197 			   we claim this disk is "optimal" if it's
   2198 			   rf_ds_used_spare, as that means it should be
   2199 			   directly substitutable for the disk it replaced.
   2200 			   We note that too...
   2201 
   2202 			 */
   2203 
   2204 			for(i=0;i<raidPtr->numRow;i++) {
   2205 				for(j=0;j<raidPtr->numCol;j++) {
   2206 					if ((raidPtr->Disks[i][j].spareRow ==
   2207 					     r) &&
   2208 					    (raidPtr->Disks[i][j].spareCol ==
   2209 					     sparecol)) {
   2210 						srow = r;
   2211 						scol = sparecol;
   2212 						break;
   2213 					}
   2214 				}
   2215 			}
   2216 
   2217 			raidread_component_label(
   2218 				      raidPtr->Disks[r][sparecol].dev,
   2219 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2220 				      &clabel);
   2221 			/* make sure status is noted */
   2222 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2223 			clabel.mod_counter = raidPtr->mod_counter;
   2224 			clabel.serial_number = raidPtr->serial_number;
   2225 			clabel.row = srow;
   2226 			clabel.column = scol;
   2227 			clabel.num_rows = raidPtr->numRow;
   2228 			clabel.num_columns = raidPtr->numCol;
   2229 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2230 			clabel.status = rf_ds_optimal;
   2231 			raidwrite_component_label(
   2232 				      raidPtr->Disks[r][sparecol].dev,
   2233 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2234 				      &clabel);
   2235 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2236 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2237 		}
   2238 	}
   2239 
   2240 #endif
   2241 }
   2242 
   2243 
   2244 void
   2245 rf_update_component_labels( raidPtr )
   2246 	RF_Raid_t *raidPtr;
   2247 {
   2248 	RF_ComponentLabel_t clabel;
   2249 	int sparecol;
   2250 	int r,c;
   2251 	int i,j;
   2252 	int srow, scol;
   2253 
   2254 	srow = -1;
   2255 	scol = -1;
   2256 
   2257 	/* XXX should do extra checks to make sure things really are clean,
   2258 	   rather than blindly setting the clean bit... */
   2259 
   2260 	raidPtr->mod_counter++;
   2261 
   2262 	for (r = 0; r < raidPtr->numRow; r++) {
   2263 		for (c = 0; c < raidPtr->numCol; c++) {
   2264 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2265 				raidread_component_label(
   2266 					raidPtr->Disks[r][c].dev,
   2267 					raidPtr->raid_cinfo[r][c].ci_vp,
   2268 					&clabel);
   2269 				/* make sure status is noted */
   2270 				clabel.status = rf_ds_optimal;
   2271 				/* bump the counter */
   2272 				clabel.mod_counter = raidPtr->mod_counter;
   2273 #if 0
   2274 				/* note where this set was configured last */
   2275 				clabel.last_unit = raidPtr->raidid;
   2276 #endif
   2277 
   2278 				raidwrite_component_label(
   2279 					raidPtr->Disks[r][c].dev,
   2280 					raidPtr->raid_cinfo[r][c].ci_vp,
   2281 					&clabel);
   2282 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2283 					raidmarkclean(
   2284 					      raidPtr->Disks[r][c].dev,
   2285 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2286 					      raidPtr->mod_counter);
   2287 				}
   2288 			}
   2289 			/* else we don't touch it.. */
   2290 #if 0
   2291 			else if (raidPtr->Disks[r][c].status !=
   2292 				   rf_ds_failed) {
   2293 				raidread_component_label(
   2294 					raidPtr->Disks[r][c].dev,
   2295 					raidPtr->raid_cinfo[r][c].ci_vp,
   2296 					&clabel);
   2297 				/* make sure status is noted */
   2298 				clabel.status =
   2299 					raidPtr->Disks[r][c].status;
   2300 				raidwrite_component_label(
   2301 					raidPtr->Disks[r][c].dev,
   2302 					raidPtr->raid_cinfo[r][c].ci_vp,
   2303 					&clabel);
   2304 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2305 					raidmarkclean(
   2306 					      raidPtr->Disks[r][c].dev,
   2307 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2308 					      raidPtr->mod_counter);
   2309 				}
   2310 			}
   2311 #endif
   2312 		}
   2313 	}
   2314 
   2315 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2316 		sparecol = raidPtr->numCol + c;
   2317 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2318 			/*
   2319 
   2320 			   we claim this disk is "optimal" if it's
   2321 			   rf_ds_used_spare, as that means it should be
   2322 			   directly substitutable for the disk it replaced.
   2323 			   We note that too...
   2324 
   2325 			 */
   2326 
   2327 			for(i=0;i<raidPtr->numRow;i++) {
   2328 				for(j=0;j<raidPtr->numCol;j++) {
   2329 					if ((raidPtr->Disks[i][j].spareRow ==
   2330 					     0) &&
   2331 					    (raidPtr->Disks[i][j].spareCol ==
   2332 					     sparecol)) {
   2333 						srow = i;
   2334 						scol = j;
   2335 						break;
   2336 					}
   2337 				}
   2338 			}
   2339 
   2340 			/* XXX shouldn't *really* need this... */
   2341 			raidread_component_label(
   2342 				      raidPtr->Disks[0][sparecol].dev,
   2343 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2344 				      &clabel);
   2345 			/* make sure status is noted */
   2346 
   2347 			raid_init_component_label(raidPtr, &clabel);
   2348 
   2349 			clabel.mod_counter = raidPtr->mod_counter;
   2350 			clabel.row = srow;
   2351 			clabel.column = scol;
   2352 			clabel.status = rf_ds_optimal;
   2353 
   2354 			raidwrite_component_label(
   2355 				      raidPtr->Disks[0][sparecol].dev,
   2356 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2357 				      &clabel);
   2358 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2359 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2360 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2361 					       raidPtr->mod_counter);
   2362 			}
   2363 		}
   2364 	}
   2365 	/* 	printf("Component labels updated\n"); */
   2366 }
   2367 
   2368 void
   2369 rf_ReconThread(req)
   2370 	struct rf_recon_req *req;
   2371 {
   2372 	int     s;
   2373 	RF_Raid_t *raidPtr;
   2374 
   2375 	s = splbio();
   2376 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2377 	raidPtr->recon_in_progress = 1;
   2378 
   2379 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2380 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2381 
   2382 	/* XXX get rid of this! we don't need it at all.. */
   2383 	RF_Free(req, sizeof(*req));
   2384 
   2385 	raidPtr->recon_in_progress = 0;
   2386 	splx(s);
   2387 
   2388 	/* That's all... */
   2389 	kthread_exit(0);        /* does not return */
   2390 }
   2391 
   2392 void
   2393 rf_RewriteParityThread(raidPtr)
   2394 	RF_Raid_t *raidPtr;
   2395 {
   2396 	int retcode;
   2397 	int s;
   2398 
   2399 	raidPtr->parity_rewrite_in_progress = 1;
   2400 	s = splbio();
   2401 	retcode = rf_RewriteParity(raidPtr);
   2402 	splx(s);
   2403 	if (retcode) {
   2404 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2405 	} else {
   2406 		/* set the clean bit!  If we shutdown correctly,
   2407 		   the clean bit on each component label will get
   2408 		   set */
   2409 		raidPtr->parity_good = RF_RAID_CLEAN;
   2410 	}
   2411 	raidPtr->parity_rewrite_in_progress = 0;
   2412 
   2413 	/* That's all... */
   2414 	kthread_exit(0);        /* does not return */
   2415 }
   2416 
   2417 
   2418 void
   2419 rf_CopybackThread(raidPtr)
   2420 	RF_Raid_t *raidPtr;
   2421 {
   2422 	int s;
   2423 
   2424 	raidPtr->copyback_in_progress = 1;
   2425 	s = splbio();
   2426 	rf_CopybackReconstructedData(raidPtr);
   2427 	splx(s);
   2428 	raidPtr->copyback_in_progress = 0;
   2429 
   2430 	/* That's all... */
   2431 	kthread_exit(0);        /* does not return */
   2432 }
   2433 
   2434 
   2435 void
   2436 rf_ReconstructInPlaceThread(req)
   2437 	struct rf_recon_req *req;
   2438 {
   2439 	int retcode;
   2440 	int s;
   2441 	RF_Raid_t *raidPtr;
   2442 
   2443 	s = splbio();
   2444 	raidPtr = req->raidPtr;
   2445 	raidPtr->recon_in_progress = 1;
   2446 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2447 	RF_Free(req, sizeof(*req));
   2448 	raidPtr->recon_in_progress = 0;
   2449 	splx(s);
   2450 
   2451 	/* That's all... */
   2452 	kthread_exit(0);        /* does not return */
   2453 }
   2454 
   2455 void
   2456 rf_mountroot_hook(dev)
   2457 	struct device *dev;
   2458 {
   2459 
   2460 }
   2461 
   2462 
   2463 RF_AutoConfig_t *
   2464 rf_find_raid_components()
   2465 {
   2466 	struct devnametobdevmaj *dtobdm;
   2467 	struct vnode *vp;
   2468 	struct disklabel label;
   2469 	struct device *dv;
   2470 	char *cd_name;
   2471 	dev_t dev;
   2472 	int error;
   2473 	int i;
   2474 	int good_one;
   2475 	RF_ComponentLabel_t *clabel;
   2476 	RF_AutoConfig_t *ac_list;
   2477 	RF_AutoConfig_t *ac;
   2478 
   2479 
   2480 	/* initialize the AutoConfig list */
   2481 	ac_list = NULL;
   2482 
   2483 if (raidautoconfig) {
   2484 
   2485 	/* we begin by trolling through *all* the devices on the system */
   2486 
   2487 	for (dv = alldevs.tqh_first; dv != NULL;
   2488 	     dv = dv->dv_list.tqe_next) {
   2489 
   2490 		/* we are only interested in disks... */
   2491 		if (dv->dv_class != DV_DISK)
   2492 			continue;
   2493 
   2494 		/* we don't care about floppies... */
   2495 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2496 			continue;
   2497 		}
   2498 
   2499 		/* need to find the device_name_to_block_device_major stuff */
   2500 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2501 		dtobdm = dev_name2blk;
   2502 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2503 			dtobdm++;
   2504 		}
   2505 
   2506 		/* get a vnode for the raw partition of this disk */
   2507 
   2508 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2509 		if (bdevvp(dev, &vp))
   2510 			panic("RAID can't alloc vnode");
   2511 
   2512 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2513 
   2514 		if (error) {
   2515 			/* "Who cares."  Continue looking
   2516 			   for something that exists*/
   2517 			vput(vp);
   2518 			continue;
   2519 		}
   2520 
   2521 		/* Ok, the disk exists.  Go get the disklabel. */
   2522 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2523 				  FREAD, NOCRED, 0);
   2524 		if (error) {
   2525 			/*
   2526 			 * XXX can't happen - open() would
   2527 			 * have errored out (or faked up one)
   2528 			 */
   2529 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2530 			       dv->dv_xname, 'a' + RAW_PART, error);
   2531 		}
   2532 
   2533 		/* don't need this any more.  We'll allocate it again
   2534 		   a little later if we really do... */
   2535 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2536 		vput(vp);
   2537 
   2538 		for (i=0; i < label.d_npartitions; i++) {
   2539 			/* We only support partitions marked as RAID */
   2540 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2541 				continue;
   2542 
   2543 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2544 			if (bdevvp(dev, &vp))
   2545 				panic("RAID can't alloc vnode");
   2546 
   2547 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2548 			if (error) {
   2549 				/* Whatever... */
   2550 				vput(vp);
   2551 				continue;
   2552 			}
   2553 
   2554 			good_one = 0;
   2555 
   2556 			clabel = (RF_ComponentLabel_t *)
   2557 				malloc(sizeof(RF_ComponentLabel_t),
   2558 				       M_RAIDFRAME, M_NOWAIT);
   2559 			if (clabel == NULL) {
   2560 				/* XXX CLEANUP HERE */
   2561 				printf("RAID auto config: out of memory!\n");
   2562 				return(NULL); /* XXX probably should panic? */
   2563 			}
   2564 
   2565 			if (!raidread_component_label(dev, vp, clabel)) {
   2566 				/* Got the label.  Does it look reasonable? */
   2567 				if (rf_reasonable_label(clabel) &&
   2568 				    (clabel->partitionSize <=
   2569 				     label.d_partitions[i].p_size)) {
   2570 #if DEBUG
   2571 					printf("Component on: %s%c: %d\n",
   2572 					       dv->dv_xname, 'a'+i,
   2573 					       label.d_partitions[i].p_size);
   2574 					print_component_label(clabel);
   2575 #endif
   2576 					/* if it's reasonable, add it,
   2577 					   else ignore it. */
   2578 					ac = (RF_AutoConfig_t *)
   2579 						malloc(sizeof(RF_AutoConfig_t),
   2580 						       M_RAIDFRAME,
   2581 						       M_NOWAIT);
   2582 					if (ac == NULL) {
   2583 						/* XXX should panic?? */
   2584 						return(NULL);
   2585 					}
   2586 
   2587 					sprintf(ac->devname, "%s%c",
   2588 						dv->dv_xname, 'a'+i);
   2589 					ac->dev = dev;
   2590 					ac->vp = vp;
   2591 					ac->clabel = clabel;
   2592 					ac->next = ac_list;
   2593 					ac_list = ac;
   2594 					good_one = 1;
   2595 				}
   2596 			}
   2597 			if (!good_one) {
   2598 				/* cleanup */
   2599 				free(clabel, M_RAIDFRAME);
   2600 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2601 				vput(vp);
   2602 			}
   2603 		}
   2604 	}
   2605 }
   2606 return(ac_list);
   2607 }
   2608 
   2609 static int
   2610 rf_reasonable_label(clabel)
   2611 	RF_ComponentLabel_t *clabel;
   2612 {
   2613 
   2614 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2615 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2616 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2617 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2618 	    clabel->row >=0 &&
   2619 	    clabel->column >= 0 &&
   2620 	    clabel->num_rows > 0 &&
   2621 	    clabel->num_columns > 0 &&
   2622 	    clabel->row < clabel->num_rows &&
   2623 	    clabel->column < clabel->num_columns &&
   2624 	    clabel->blockSize > 0 &&
   2625 	    clabel->numBlocks > 0) {
   2626 		/* label looks reasonable enough... */
   2627 		return(1);
   2628 	}
   2629 	return(0);
   2630 }
   2631 
   2632 
   2633 void
   2634 print_component_label(clabel)
   2635 	RF_ComponentLabel_t *clabel;
   2636 {
   2637 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2638 	       clabel->row, clabel->column,
   2639 	       clabel->num_rows, clabel->num_columns);
   2640 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2641 	       clabel->version, clabel->serial_number,
   2642 	       clabel->mod_counter);
   2643 	printf("   Clean: %s Status: %d\n",
   2644 	       clabel->clean ? "Yes" : "No", clabel->status );
   2645 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2646 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2647 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2648 	       (char) clabel->parityConfig, clabel->blockSize,
   2649 	       clabel->numBlocks);
   2650 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2651 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2652 #if 0
   2653 	   printf("   Config order: %d\n", clabel->config_order);
   2654 #endif
   2655 
   2656 }
   2657 
   2658 RF_ConfigSet_t *
   2659 rf_create_auto_sets(ac_list)
   2660 	RF_AutoConfig_t *ac_list;
   2661 {
   2662 	RF_AutoConfig_t *ac;
   2663 	RF_ConfigSet_t *config_sets;
   2664 	RF_ConfigSet_t *cset;
   2665 	RF_AutoConfig_t *ac_next;
   2666 
   2667 
   2668 	config_sets = NULL;
   2669 
   2670 	/* Go through the AutoConfig list, and figure out which components
   2671 	   belong to what sets.  */
   2672 	ac = ac_list;
   2673 	while(ac!=NULL) {
   2674 		/* we're going to putz with ac->next, so save it here
   2675 		   for use at the end of the loop */
   2676 		ac_next = ac->next;
   2677 
   2678 		if (config_sets == NULL) {
   2679 			/* will need at least this one... */
   2680 			config_sets = (RF_ConfigSet_t *)
   2681 				malloc(sizeof(RF_ConfigSet_t),
   2682 				       M_RAIDFRAME, M_NOWAIT);
   2683 			if (config_sets == NULL) {
   2684 				panic("rf_create_auto_sets: No memory!\n");
   2685 			}
   2686 			/* this one is easy :) */
   2687 			config_sets->ac = ac;
   2688 			config_sets->next = NULL;
   2689 			config_sets->rootable = 0;
   2690 			ac->next = NULL;
   2691 		} else {
   2692 			/* which set does this component fit into? */
   2693 			cset = config_sets;
   2694 			while(cset!=NULL) {
   2695 				if (rf_does_it_fit(cset, ac)) {
   2696 					/* looks like it matches */
   2697 					ac->next = cset->ac;
   2698 					cset->ac = ac;
   2699 					break;
   2700 				}
   2701 				cset = cset->next;
   2702 			}
   2703 			if (cset==NULL) {
   2704 				/* didn't find a match above... new set..*/
   2705 				cset = (RF_ConfigSet_t *)
   2706 					malloc(sizeof(RF_ConfigSet_t),
   2707 					       M_RAIDFRAME, M_NOWAIT);
   2708 				if (cset == NULL) {
   2709 					panic("rf_create_auto_sets: No memory!\n");
   2710 				}
   2711 				cset->ac = ac;
   2712 				ac->next = NULL;
   2713 				cset->next = config_sets;
   2714 				cset->rootable = 0;
   2715 				config_sets = cset;
   2716 			}
   2717 		}
   2718 		ac = ac_next;
   2719 	}
   2720 
   2721 
   2722 	return(config_sets);
   2723 }
   2724 
   2725 static int
   2726 rf_does_it_fit(cset, ac)
   2727 	RF_ConfigSet_t *cset;
   2728 	RF_AutoConfig_t *ac;
   2729 {
   2730 	RF_ComponentLabel_t *clabel1, *clabel2;
   2731 
   2732 	/* If this one matches the *first* one in the set, that's good
   2733 	   enough, since the other members of the set would have been
   2734 	   through here too... */
   2735 	/* note that we are not checking partitionSize here..
   2736 
   2737 	   Note that we are also not checking the mod_counters here.
   2738 	   If everything else matches execpt the mod_counter, that's
   2739 	   good enough for this test.  We will deal with the mod_counters
   2740 	   a little later in the autoconfiguration process.
   2741 
   2742 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2743 
   2744 	*/
   2745 
   2746 	clabel1 = cset->ac->clabel;
   2747 	clabel2 = ac->clabel;
   2748 	if ((clabel1->version == clabel2->version) &&
   2749 	    (clabel1->serial_number == clabel2->serial_number) &&
   2750 	    (clabel1->num_rows == clabel2->num_rows) &&
   2751 	    (clabel1->num_columns == clabel2->num_columns) &&
   2752 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2753 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2754 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2755 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2756 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2757 	    (clabel1->blockSize == clabel2->blockSize) &&
   2758 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2759 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2760 	    (clabel1->root_partition == clabel2->root_partition) &&
   2761 	    (clabel1->last_unit == clabel2->last_unit) &&
   2762 	    (clabel1->config_order == clabel2->config_order)) {
   2763 		/* if it get's here, it almost *has* to be a match */
   2764 	} else {
   2765 		/* it's not consistent with somebody in the set..
   2766 		   punt */
   2767 		return(0);
   2768 	}
   2769 	/* all was fine.. it must fit... */
   2770 	return(1);
   2771 }
   2772 
   2773 int
   2774 rf_have_enough_components(cset)
   2775 	RF_ConfigSet_t *cset;
   2776 {
   2777 	RF_AutoConfig_t *ac;
   2778 	RF_AutoConfig_t *auto_config;
   2779 	RF_ComponentLabel_t *clabel;
   2780 	int r,c;
   2781 	int num_rows;
   2782 	int num_cols;
   2783 	int num_missing;
   2784 
   2785 	/* check to see that we have enough 'live' components
   2786 	   of this set.  If so, we can configure it if necessary */
   2787 
   2788 	num_rows = cset->ac->clabel->num_rows;
   2789 	num_cols = cset->ac->clabel->num_columns;
   2790 
   2791 	/* XXX Check for duplicate components!?!?!? */
   2792 
   2793 	num_missing = 0;
   2794 	auto_config = cset->ac;
   2795 
   2796 	for(r=0; r<num_rows; r++) {
   2797 		for(c=0; c<num_cols; c++) {
   2798 			ac = auto_config;
   2799 			while(ac!=NULL) {
   2800 				if (ac->clabel==NULL) {
   2801 					/* big-time bad news. */
   2802 					goto fail;
   2803 				}
   2804 				if ((ac->clabel->row == r) &&
   2805 				    (ac->clabel->column == c)) {
   2806 					/* it's this one... */
   2807 #if DEBUG
   2808 					printf("Found: %s at %d,%d\n",
   2809 					       ac->devname,r,c);
   2810 #endif
   2811 					break;
   2812 				}
   2813 				ac=ac->next;
   2814 			}
   2815 			if (ac==NULL) {
   2816 				/* Didn't find one here! */
   2817 				num_missing++;
   2818 			}
   2819 		}
   2820 	}
   2821 
   2822 	clabel = cset->ac->clabel;
   2823 
   2824 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2825 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2826 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2827 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2828 		/* XXX this needs to be made *much* more general */
   2829 		/* Too many failures */
   2830 		return(0);
   2831 	}
   2832 	/* otherwise, all is well, and we've got enough to take a kick
   2833 	   at autoconfiguring this set */
   2834 	return(1);
   2835 fail:
   2836 	return(0);
   2837 
   2838 }
   2839 
   2840 void
   2841 rf_create_configuration(ac,config,raidPtr)
   2842 	RF_AutoConfig_t *ac;
   2843 	RF_Config_t *config;
   2844 	RF_Raid_t *raidPtr;
   2845 {
   2846 	RF_ComponentLabel_t *clabel;
   2847 
   2848 	clabel = ac->clabel;
   2849 
   2850 	/* 1. Fill in the common stuff */
   2851 	config->numRow = clabel->num_rows;
   2852 	config->numCol = clabel->num_columns;
   2853 	config->numSpare = 0; /* XXX should this be set here? */
   2854 	config->sectPerSU = clabel->sectPerSU;
   2855 	config->SUsPerPU = clabel->SUsPerPU;
   2856 	config->SUsPerRU = clabel->SUsPerRU;
   2857 	config->parityConfig = clabel->parityConfig;
   2858 	/* XXX... */
   2859 	strcpy(config->diskQueueType,"fifo");
   2860 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2861 	config->layoutSpecificSize = 0; /* XXX ?? */
   2862 
   2863 	while(ac!=NULL) {
   2864 		/* row/col values will be in range due to the checks
   2865 		   in reasonable_label() */
   2866 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2867 		       ac->devname);
   2868 		ac = ac->next;
   2869 	}
   2870 
   2871 }
   2872 
   2873 int
   2874 rf_set_autoconfig(raidPtr, new_value)
   2875 	RF_Raid_t *raidPtr;
   2876 	int new_value;
   2877 {
   2878 	RF_ComponentLabel_t clabel;
   2879 	struct vnode *vp;
   2880 	dev_t dev;
   2881 	int row, column;
   2882 
   2883 	raidPtr->autoconfigure = new_value;
   2884 	for(row=0; row<raidPtr->numRow; row++) {
   2885 		for(column=0; column<raidPtr->numCol; column++) {
   2886 			dev = raidPtr->Disks[row][column].dev;
   2887 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2888 			raidread_component_label(dev, vp, &clabel);
   2889 			clabel.autoconfigure = new_value;
   2890 			raidwrite_component_label(dev, vp, &clabel);
   2891 		}
   2892 	}
   2893 	return(new_value);
   2894 }
   2895 
   2896 int
   2897 rf_set_rootpartition(raidPtr, new_value)
   2898 	RF_Raid_t *raidPtr;
   2899 	int new_value;
   2900 {
   2901 	RF_ComponentLabel_t clabel;
   2902 	struct vnode *vp;
   2903 	dev_t dev;
   2904 	int row, column;
   2905 
   2906 	raidPtr->root_partition = new_value;
   2907 	for(row=0; row<raidPtr->numRow; row++) {
   2908 		for(column=0; column<raidPtr->numCol; column++) {
   2909 			dev = raidPtr->Disks[row][column].dev;
   2910 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2911 			raidread_component_label(dev, vp, &clabel);
   2912 			clabel.root_partition = new_value;
   2913 			raidwrite_component_label(dev, vp, &clabel);
   2914 		}
   2915 	}
   2916 	return(new_value);
   2917 }
   2918 
   2919 void
   2920 rf_release_all_vps(cset)
   2921 	RF_ConfigSet_t *cset;
   2922 {
   2923 	RF_AutoConfig_t *ac;
   2924 
   2925 	ac = cset->ac;
   2926 	while(ac!=NULL) {
   2927 		/* Close the vp, and give it back */
   2928 		if (ac->vp) {
   2929 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2930 			vput(ac->vp);
   2931 		}
   2932 		ac = ac->next;
   2933 	}
   2934 }
   2935 
   2936 
   2937 void
   2938 rf_cleanup_config_set(cset)
   2939 	RF_ConfigSet_t *cset;
   2940 {
   2941 	RF_AutoConfig_t *ac;
   2942 	RF_AutoConfig_t *next_ac;
   2943 
   2944 	ac = cset->ac;
   2945 	while(ac!=NULL) {
   2946 		next_ac = ac->next;
   2947 		/* nuke the label */
   2948 		free(ac->clabel, M_RAIDFRAME);
   2949 		/* cleanup the config structure */
   2950 		free(ac, M_RAIDFRAME);
   2951 		/* "next.." */
   2952 		ac = next_ac;
   2953 	}
   2954 	/* and, finally, nuke the config set */
   2955 	free(cset, M_RAIDFRAME);
   2956 }
   2957 
   2958 
   2959 void
   2960 raid_init_component_label(raidPtr, clabel)
   2961 	RF_Raid_t *raidPtr;
   2962 	RF_ComponentLabel_t *clabel;
   2963 {
   2964 	/* current version number */
   2965 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   2966 	clabel->serial_number = raidPtr->serial_number;
   2967 	clabel->mod_counter = raidPtr->mod_counter;
   2968 	clabel->num_rows = raidPtr->numRow;
   2969 	clabel->num_columns = raidPtr->numCol;
   2970 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   2971 	clabel->status = rf_ds_optimal; /* "It's good!" */
   2972 
   2973 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   2974 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   2975 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   2976 
   2977 	clabel->blockSize = raidPtr->bytesPerSector;
   2978 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   2979 
   2980 	/* XXX not portable */
   2981 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   2982 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   2983 	clabel->autoconfigure = raidPtr->autoconfigure;
   2984 	clabel->root_partition = raidPtr->root_partition;
   2985 	clabel->last_unit = raidPtr->raidid;
   2986 	clabel->config_order = raidPtr->config_order;
   2987 }
   2988 
   2989 int
   2990 rf_auto_config_set(cset,unit)
   2991 	RF_ConfigSet_t *cset;
   2992 	int *unit;
   2993 {
   2994 	RF_Raid_t *raidPtr;
   2995 	RF_Config_t *config;
   2996 	int raidID;
   2997 	int retcode;
   2998 
   2999 	printf("Starting autoconfigure on raid%d\n",raidID);
   3000 
   3001 	retcode = 0;
   3002 	*unit = -1;
   3003 
   3004 	/* 1. Create a config structure */
   3005 
   3006 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3007 				       M_RAIDFRAME,
   3008 				       M_NOWAIT);
   3009 	if (config==NULL) {
   3010 		printf("Out of mem!?!?\n");
   3011 				/* XXX do something more intelligent here. */
   3012 		return(1);
   3013 	}
   3014 	/* XXX raidID needs to be set correctly.. */
   3015 
   3016 	/*
   3017 	   2. Figure out what RAID ID this one is supposed to live at
   3018 	   See if we can get the same RAID dev that it was configured
   3019 	   on last time..
   3020 	*/
   3021 
   3022 	raidID = cset->ac->clabel->last_unit;
   3023 	if ((raidID < 0) || (raidID >= numraid)) {
   3024 		/* let's not wander off into lala land. */
   3025 		raidID = numraid - 1;
   3026 	}
   3027 	if (raidPtrs[raidID]->valid != 0) {
   3028 
   3029 		/*
   3030 		   Nope... Go looking for an alternative...
   3031 		   Start high so we don't immediately use raid0 if that's
   3032 		   not taken.
   3033 		*/
   3034 
   3035 		for(raidID = numraid; raidID >= 0; raidID--) {
   3036 			if (raidPtrs[raidID]->valid == 0) {
   3037 				/* can use this one! */
   3038 				break;
   3039 			}
   3040 		}
   3041 	}
   3042 
   3043 	if (raidID < 0) {
   3044 		/* punt... */
   3045 		printf("Unable to auto configure this set!\n");
   3046 		printf("(Out of RAID devs!)\n");
   3047 		return(1);
   3048 	}
   3049 
   3050 	raidPtr = raidPtrs[raidID];
   3051 
   3052 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3053 	raidPtr->raidid = raidID;
   3054 	raidPtr->openings = RAIDOUTSTANDING;
   3055 
   3056 	/* 3. Build the configuration structure */
   3057 	rf_create_configuration(cset->ac, config, raidPtr);
   3058 
   3059 	/* 4. Do the configuration */
   3060 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3061 
   3062 	if (retcode == 0) {
   3063 
   3064 		raidinit(raidPtrs[raidID]);
   3065 
   3066 		rf_markalldirty(raidPtrs[raidID]);
   3067 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3068 		if (cset->ac->clabel->root_partition==1) {
   3069 			/* everything configured just fine.  Make a note
   3070 			   that this set is eligible to be root. */
   3071 			cset->rootable = 1;
   3072 			/* XXX do this here? */
   3073 			raidPtrs[raidID]->root_partition = 1;
   3074 		}
   3075 	}
   3076 
   3077 	/* 5. Cleanup */
   3078 	free(config, M_RAIDFRAME);
   3079 
   3080 	*unit = raidID;
   3081 	return(retcode);
   3082 }
   3083