Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.89
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.89 2000/06/01 00:50:01 matt Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include "raid.h"
    139 #include "opt_raid_autoconfig.h"
    140 #include "rf_raid.h"
    141 #include "rf_raidframe.h"
    142 #include "rf_copyback.h"
    143 #include "rf_dag.h"
    144 #include "rf_dagflags.h"
    145 #include "rf_diskqueue.h"
    146 #include "rf_acctrace.h"
    147 #include "rf_etimer.h"
    148 #include "rf_general.h"
    149 #include "rf_debugMem.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_debugprint.h"
    155 #include "rf_threadstuff.h"
    156 #include "rf_configure.h"
    157 
    158 int     rf_kdebug_level = 0;
    159 
    160 #ifdef DEBUG
    161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    162 #else				/* DEBUG */
    163 #define db1_printf(a) { }
    164 #endif				/* DEBUG */
    165 
    166 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    167 
    168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    169 
    170 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    171 						 * spare table */
    172 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    173 						 * installation process */
    174 
    175 /* prototypes */
    176 static void KernelWakeupFunc(struct buf * bp);
    177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    178 		   dev_t dev, RF_SectorNum_t startSect,
    179 		   RF_SectorCount_t numSect, caddr_t buf,
    180 		   void (*cbFunc) (struct buf *), void *cbArg,
    181 		   int logBytesPerSector, struct proc * b_proc);
    182 static void raidinit __P((RF_Raid_t *));
    183 
    184 void raidattach __P((int));
    185 int raidsize __P((dev_t));
    186 int raidopen __P((dev_t, int, int, struct proc *));
    187 int raidclose __P((dev_t, int, int, struct proc *));
    188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    189 int raidwrite __P((dev_t, struct uio *, int));
    190 int raidread __P((dev_t, struct uio *, int));
    191 void raidstrategy __P((struct buf *));
    192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    193 
    194 /*
    195  * Pilfered from ccd.c
    196  */
    197 
    198 struct raidbuf {
    199 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    200 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    201 	int     rf_flags;	/* misc. flags */
    202 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    203 };
    204 
    205 
    206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    207 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    208 
    209 /* XXX Not sure if the following should be replacing the raidPtrs above,
    210    or if it should be used in conjunction with that...
    211 */
    212 
    213 struct raid_softc {
    214 	int     sc_flags;	/* flags */
    215 	int     sc_cflags;	/* configuration flags */
    216 	size_t  sc_size;        /* size of the raid device */
    217 	char    sc_xname[20];	/* XXX external name */
    218 	struct disk sc_dkdev;	/* generic disk device info */
    219 	struct pool sc_cbufpool;	/* component buffer pool */
    220 	struct buf_queue buf_queue;	/* used for the device queue */
    221 };
    222 /* sc_flags */
    223 #define RAIDF_INITED	0x01	/* unit has been initialized */
    224 #define RAIDF_WLABEL	0x02	/* label area is writable */
    225 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    226 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    227 #define RAIDF_LOCKED	0x80	/* unit is locked */
    228 
    229 #define	raidunit(x)	DISKUNIT(x)
    230 int numraid = 0;
    231 
    232 /*
    233  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    234  * Be aware that large numbers can allow the driver to consume a lot of
    235  * kernel memory, especially on writes, and in degraded mode reads.
    236  *
    237  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    238  * a single 64K write will typically require 64K for the old data,
    239  * 64K for the old parity, and 64K for the new parity, for a total
    240  * of 192K (if the parity buffer is not re-used immediately).
    241  * Even it if is used immedately, that's still 128K, which when multiplied
    242  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    243  *
    244  * Now in degraded mode, for example, a 64K read on the above setup may
    245  * require data reconstruction, which will require *all* of the 4 remaining
    246  * disks to participate -- 4 * 32K/disk == 128K again.
    247  */
    248 
    249 #ifndef RAIDOUTSTANDING
    250 #define RAIDOUTSTANDING   6
    251 #endif
    252 
    253 #define RAIDLABELDEV(dev)	\
    254 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    255 
    256 /* declared here, and made public, for the benefit of KVM stuff.. */
    257 struct raid_softc *raid_softc;
    258 
    259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    260 				     struct disklabel *));
    261 static void raidgetdisklabel __P((dev_t));
    262 static void raidmakedisklabel __P((struct raid_softc *));
    263 
    264 static int raidlock __P((struct raid_softc *));
    265 static void raidunlock __P((struct raid_softc *));
    266 
    267 static void rf_markalldirty __P((RF_Raid_t *));
    268 void rf_mountroot_hook __P((struct device *));
    269 
    270 struct device *raidrootdev;
    271 
    272 void rf_ReconThread __P((struct rf_recon_req *));
    273 /* XXX what I want is: */
    274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    278 void rf_buildroothack __P((void *));
    279 
    280 RF_AutoConfig_t *rf_find_raid_components __P((void));
    281 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    282 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    283 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    284 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    285 				  RF_Raid_t *));
    286 int rf_set_autoconfig __P((RF_Raid_t *, int));
    287 int rf_set_rootpartition __P((RF_Raid_t *, int));
    288 void rf_release_all_vps __P((RF_ConfigSet_t *));
    289 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    290 int rf_have_enough_components __P((RF_ConfigSet_t *));
    291 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    292 
    293 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    294 				  allow autoconfig to take place.
    295 			          Note that this is overridden by having
    296 			          RAID_AUTOCONFIG as an option in the
    297 			          kernel config file.  */
    298 
    299 void
    300 raidattach(num)
    301 	int     num;
    302 {
    303 	int raidID;
    304 	int i, rc;
    305 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    306 	RF_ConfigSet_t *config_sets;
    307 
    308 #ifdef DEBUG
    309 	printf("raidattach: Asked for %d units\n", num);
    310 #endif
    311 
    312 	if (num <= 0) {
    313 #ifdef DIAGNOSTIC
    314 		panic("raidattach: count <= 0");
    315 #endif
    316 		return;
    317 	}
    318 	/* This is where all the initialization stuff gets done. */
    319 
    320 	numraid = num;
    321 
    322 	/* Make some space for requested number of units... */
    323 
    324 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    325 	if (raidPtrs == NULL) {
    326 		panic("raidPtrs is NULL!!\n");
    327 	}
    328 
    329 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    330 	if (rc) {
    331 		RF_PANIC();
    332 	}
    333 
    334 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    335 
    336 	for (i = 0; i < num; i++)
    337 		raidPtrs[i] = NULL;
    338 	rc = rf_BootRaidframe();
    339 	if (rc == 0)
    340 		printf("Kernelized RAIDframe activated\n");
    341 	else
    342 		panic("Serious error booting RAID!!\n");
    343 
    344 	/* put together some datastructures like the CCD device does.. This
    345 	 * lets us lock the device and what-not when it gets opened. */
    346 
    347 	raid_softc = (struct raid_softc *)
    348 		malloc(num * sizeof(struct raid_softc),
    349 		       M_RAIDFRAME, M_NOWAIT);
    350 	if (raid_softc == NULL) {
    351 		printf("WARNING: no memory for RAIDframe driver\n");
    352 		return;
    353 	}
    354 
    355 	bzero(raid_softc, num * sizeof(struct raid_softc));
    356 
    357 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    358 					      M_RAIDFRAME, M_NOWAIT);
    359 	if (raidrootdev == NULL) {
    360 		panic("No memory for RAIDframe driver!!?!?!\n");
    361 	}
    362 
    363 	for (raidID = 0; raidID < num; raidID++) {
    364 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    365 
    366 		raidrootdev[raidID].dv_class  = DV_DISK;
    367 		raidrootdev[raidID].dv_cfdata = NULL;
    368 		raidrootdev[raidID].dv_unit   = raidID;
    369 		raidrootdev[raidID].dv_parent = NULL;
    370 		raidrootdev[raidID].dv_flags  = 0;
    371 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    372 
    373 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    374 			  (RF_Raid_t *));
    375 		if (raidPtrs[raidID] == NULL) {
    376 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    377 			numraid = raidID;
    378 			return;
    379 		}
    380 	}
    381 
    382 #if RAID_AUTOCONFIG
    383 	raidautoconfig = 1;
    384 #endif
    385 
    386 if (raidautoconfig) {
    387 	/* 1. locate all RAID components on the system */
    388 
    389 #if DEBUG
    390 	printf("Searching for raid components...\n");
    391 #endif
    392 	ac_list = rf_find_raid_components();
    393 
    394 	/* 2. sort them into their respective sets */
    395 
    396 	config_sets = rf_create_auto_sets(ac_list);
    397 
    398 	/* 3. evaluate each set and configure the valid ones
    399 	   This gets done in rf_buildroothack() */
    400 
    401 	/* schedule the creation of the thread to do the
    402 	   "/ on RAID" stuff */
    403 
    404 	kthread_create(rf_buildroothack,config_sets);
    405 
    406 #if 0
    407 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    408 #endif
    409 }
    410 
    411 }
    412 
    413 void
    414 rf_buildroothack(arg)
    415 	void *arg;
    416 {
    417 	RF_ConfigSet_t *config_sets = arg;
    418 	RF_ConfigSet_t *cset;
    419 	RF_ConfigSet_t *next_cset;
    420 	int retcode;
    421 	int raidID;
    422 	int rootID;
    423 	int num_root;
    424 
    425 	num_root = 0;
    426 	cset = config_sets;
    427 	while(cset != NULL ) {
    428 		next_cset = cset->next;
    429 		if (rf_have_enough_components(cset) &&
    430 		    cset->ac->clabel->autoconfigure==1) {
    431 			retcode = rf_auto_config_set(cset,&raidID);
    432 			if (!retcode) {
    433 				if (cset->rootable) {
    434 					rootID = raidID;
    435 					num_root++;
    436 				}
    437 			} else {
    438 				/* The autoconfig didn't work :( */
    439 #if DEBUG
    440 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    441 #endif
    442 				rf_release_all_vps(cset);
    443 			}
    444 		} else {
    445 			/* we're not autoconfiguring this set...
    446 			   release the associated resources */
    447 			rf_release_all_vps(cset);
    448 		}
    449 		/* cleanup */
    450 		rf_cleanup_config_set(cset);
    451 		cset = next_cset;
    452 	}
    453 	if (boothowto & RB_ASKNAME) {
    454 		/* We don't auto-config... */
    455 	} else {
    456 		/* They didn't ask, and we found something bootable... */
    457 
    458 		if (num_root == 1) {
    459 			booted_device = &raidrootdev[rootID];
    460 		} else if (num_root > 1) {
    461 			/* we can't guess.. require the user to answer... */
    462 			boothowto |= RB_ASKNAME;
    463 		}
    464 	}
    465 }
    466 
    467 
    468 int
    469 raidsize(dev)
    470 	dev_t   dev;
    471 {
    472 	struct raid_softc *rs;
    473 	struct disklabel *lp;
    474 	int     part, unit, omask, size;
    475 
    476 	unit = raidunit(dev);
    477 	if (unit >= numraid)
    478 		return (-1);
    479 	rs = &raid_softc[unit];
    480 
    481 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    482 		return (-1);
    483 
    484 	part = DISKPART(dev);
    485 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    486 	lp = rs->sc_dkdev.dk_label;
    487 
    488 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    489 		return (-1);
    490 
    491 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    492 		size = -1;
    493 	else
    494 		size = lp->d_partitions[part].p_size *
    495 		    (lp->d_secsize / DEV_BSIZE);
    496 
    497 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    498 		return (-1);
    499 
    500 	return (size);
    501 
    502 }
    503 
    504 int
    505 raiddump(dev, blkno, va, size)
    506 	dev_t   dev;
    507 	daddr_t blkno;
    508 	caddr_t va;
    509 	size_t  size;
    510 {
    511 	/* Not implemented. */
    512 	return ENXIO;
    513 }
    514 /* ARGSUSED */
    515 int
    516 raidopen(dev, flags, fmt, p)
    517 	dev_t   dev;
    518 	int     flags, fmt;
    519 	struct proc *p;
    520 {
    521 	int     unit = raidunit(dev);
    522 	struct raid_softc *rs;
    523 	struct disklabel *lp;
    524 	int     part, pmask;
    525 	int     error = 0;
    526 
    527 	if (unit >= numraid)
    528 		return (ENXIO);
    529 	rs = &raid_softc[unit];
    530 
    531 	if ((error = raidlock(rs)) != 0)
    532 		return (error);
    533 	lp = rs->sc_dkdev.dk_label;
    534 
    535 	part = DISKPART(dev);
    536 	pmask = (1 << part);
    537 
    538 	db1_printf(("Opening raid device number: %d partition: %d\n",
    539 		unit, part));
    540 
    541 
    542 	if ((rs->sc_flags & RAIDF_INITED) &&
    543 	    (rs->sc_dkdev.dk_openmask == 0))
    544 		raidgetdisklabel(dev);
    545 
    546 	/* make sure that this partition exists */
    547 
    548 	if (part != RAW_PART) {
    549 		db1_printf(("Not a raw partition..\n"));
    550 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    551 		    ((part >= lp->d_npartitions) ||
    552 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    553 			error = ENXIO;
    554 			raidunlock(rs);
    555 			db1_printf(("Bailing out...\n"));
    556 			return (error);
    557 		}
    558 	}
    559 	/* Prevent this unit from being unconfigured while open. */
    560 	switch (fmt) {
    561 	case S_IFCHR:
    562 		rs->sc_dkdev.dk_copenmask |= pmask;
    563 		break;
    564 
    565 	case S_IFBLK:
    566 		rs->sc_dkdev.dk_bopenmask |= pmask;
    567 		break;
    568 	}
    569 
    570 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    571 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    572 		/* First one... mark things as dirty... Note that we *MUST*
    573 		 have done a configure before this.  I DO NOT WANT TO BE
    574 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    575 		 THAT THEY BELONG TOGETHER!!!!! */
    576 		/* XXX should check to see if we're only open for reading
    577 		   here... If so, we needn't do this, but then need some
    578 		   other way of keeping track of what's happened.. */
    579 
    580 		rf_markalldirty( raidPtrs[unit] );
    581 	}
    582 
    583 
    584 	rs->sc_dkdev.dk_openmask =
    585 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    586 
    587 	raidunlock(rs);
    588 
    589 	return (error);
    590 
    591 
    592 }
    593 /* ARGSUSED */
    594 int
    595 raidclose(dev, flags, fmt, p)
    596 	dev_t   dev;
    597 	int     flags, fmt;
    598 	struct proc *p;
    599 {
    600 	int     unit = raidunit(dev);
    601 	struct raid_softc *rs;
    602 	int     error = 0;
    603 	int     part;
    604 
    605 	if (unit >= numraid)
    606 		return (ENXIO);
    607 	rs = &raid_softc[unit];
    608 
    609 	if ((error = raidlock(rs)) != 0)
    610 		return (error);
    611 
    612 	part = DISKPART(dev);
    613 
    614 	/* ...that much closer to allowing unconfiguration... */
    615 	switch (fmt) {
    616 	case S_IFCHR:
    617 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    618 		break;
    619 
    620 	case S_IFBLK:
    621 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    622 		break;
    623 	}
    624 	rs->sc_dkdev.dk_openmask =
    625 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    626 
    627 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    628 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    629 		/* Last one... device is not unconfigured yet.
    630 		   Device shutdown has taken care of setting the
    631 		   clean bits if RAIDF_INITED is not set
    632 		   mark things as clean... */
    633 #if 0
    634 		printf("Last one on raid%d.  Updating status.\n",unit);
    635 #endif
    636 		rf_final_update_component_labels( raidPtrs[unit] );
    637 	}
    638 
    639 	raidunlock(rs);
    640 	return (0);
    641 
    642 }
    643 
    644 void
    645 raidstrategy(bp)
    646 	struct buf *bp;
    647 {
    648 	int s;
    649 
    650 	unsigned int raidID = raidunit(bp->b_dev);
    651 	RF_Raid_t *raidPtr;
    652 	struct raid_softc *rs = &raid_softc[raidID];
    653 	struct disklabel *lp;
    654 	int     wlabel;
    655 
    656 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    657 		bp->b_error = ENXIO;
    658 		bp->b_flags = B_ERROR;
    659 		bp->b_resid = bp->b_bcount;
    660 		biodone(bp);
    661 		return;
    662 	}
    663 	if (raidID >= numraid || !raidPtrs[raidID]) {
    664 		bp->b_error = ENODEV;
    665 		bp->b_flags |= B_ERROR;
    666 		bp->b_resid = bp->b_bcount;
    667 		biodone(bp);
    668 		return;
    669 	}
    670 	raidPtr = raidPtrs[raidID];
    671 	if (!raidPtr->valid) {
    672 		bp->b_error = ENODEV;
    673 		bp->b_flags |= B_ERROR;
    674 		bp->b_resid = bp->b_bcount;
    675 		biodone(bp);
    676 		return;
    677 	}
    678 	if (bp->b_bcount == 0) {
    679 		db1_printf(("b_bcount is zero..\n"));
    680 		biodone(bp);
    681 		return;
    682 	}
    683 	lp = rs->sc_dkdev.dk_label;
    684 
    685 	/*
    686 	 * Do bounds checking and adjust transfer.  If there's an
    687 	 * error, the bounds check will flag that for us.
    688 	 */
    689 
    690 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    691 	if (DISKPART(bp->b_dev) != RAW_PART)
    692 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    693 			db1_printf(("Bounds check failed!!:%d %d\n",
    694 				(int) bp->b_blkno, (int) wlabel));
    695 			biodone(bp);
    696 			return;
    697 		}
    698 	s = splbio();
    699 
    700 	bp->b_resid = 0;
    701 
    702 	/* stuff it onto our queue */
    703 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    704 
    705 	raidstart(raidPtrs[raidID]);
    706 
    707 	splx(s);
    708 }
    709 /* ARGSUSED */
    710 int
    711 raidread(dev, uio, flags)
    712 	dev_t   dev;
    713 	struct uio *uio;
    714 	int     flags;
    715 {
    716 	int     unit = raidunit(dev);
    717 	struct raid_softc *rs;
    718 	int     part;
    719 
    720 	if (unit >= numraid)
    721 		return (ENXIO);
    722 	rs = &raid_softc[unit];
    723 
    724 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    725 		return (ENXIO);
    726 	part = DISKPART(dev);
    727 
    728 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    729 
    730 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    731 
    732 }
    733 /* ARGSUSED */
    734 int
    735 raidwrite(dev, uio, flags)
    736 	dev_t   dev;
    737 	struct uio *uio;
    738 	int     flags;
    739 {
    740 	int     unit = raidunit(dev);
    741 	struct raid_softc *rs;
    742 
    743 	if (unit >= numraid)
    744 		return (ENXIO);
    745 	rs = &raid_softc[unit];
    746 
    747 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    748 		return (ENXIO);
    749 	db1_printf(("raidwrite\n"));
    750 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    751 
    752 }
    753 
    754 int
    755 raidioctl(dev, cmd, data, flag, p)
    756 	dev_t   dev;
    757 	u_long  cmd;
    758 	caddr_t data;
    759 	int     flag;
    760 	struct proc *p;
    761 {
    762 	int     unit = raidunit(dev);
    763 	int     error = 0;
    764 	int     part, pmask;
    765 	struct raid_softc *rs;
    766 	RF_Config_t *k_cfg, *u_cfg;
    767 	RF_Raid_t *raidPtr;
    768 	RF_RaidDisk_t *diskPtr;
    769 	RF_AccTotals_t *totals;
    770 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    771 	u_char *specific_buf;
    772 	int retcode = 0;
    773 	int row;
    774 	int column;
    775 	struct rf_recon_req *rrcopy, *rr;
    776 	RF_ComponentLabel_t *clabel;
    777 	RF_ComponentLabel_t ci_label;
    778 	RF_ComponentLabel_t **clabel_ptr;
    779 	RF_SingleComponent_t *sparePtr,*componentPtr;
    780 	RF_SingleComponent_t hot_spare;
    781 	RF_SingleComponent_t component;
    782 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    783 	int i, j, d;
    784 
    785 	if (unit >= numraid)
    786 		return (ENXIO);
    787 	rs = &raid_softc[unit];
    788 	raidPtr = raidPtrs[unit];
    789 
    790 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    791 		(int) DISKPART(dev), (int) unit, (int) cmd));
    792 
    793 	/* Must be open for writes for these commands... */
    794 	switch (cmd) {
    795 	case DIOCSDINFO:
    796 	case DIOCWDINFO:
    797 	case DIOCWLABEL:
    798 		if ((flag & FWRITE) == 0)
    799 			return (EBADF);
    800 	}
    801 
    802 	/* Must be initialized for these... */
    803 	switch (cmd) {
    804 	case DIOCGDINFO:
    805 	case DIOCSDINFO:
    806 	case DIOCWDINFO:
    807 	case DIOCGPART:
    808 	case DIOCWLABEL:
    809 	case DIOCGDEFLABEL:
    810 	case RAIDFRAME_SHUTDOWN:
    811 	case RAIDFRAME_REWRITEPARITY:
    812 	case RAIDFRAME_GET_INFO:
    813 	case RAIDFRAME_RESET_ACCTOTALS:
    814 	case RAIDFRAME_GET_ACCTOTALS:
    815 	case RAIDFRAME_KEEP_ACCTOTALS:
    816 	case RAIDFRAME_GET_SIZE:
    817 	case RAIDFRAME_FAIL_DISK:
    818 	case RAIDFRAME_COPYBACK:
    819 	case RAIDFRAME_CHECK_RECON_STATUS:
    820 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    821 	case RAIDFRAME_GET_COMPONENT_LABEL:
    822 	case RAIDFRAME_SET_COMPONENT_LABEL:
    823 	case RAIDFRAME_ADD_HOT_SPARE:
    824 	case RAIDFRAME_REMOVE_HOT_SPARE:
    825 	case RAIDFRAME_INIT_LABELS:
    826 	case RAIDFRAME_REBUILD_IN_PLACE:
    827 	case RAIDFRAME_CHECK_PARITY:
    828 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    829 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    830 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    831 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    832 	case RAIDFRAME_SET_AUTOCONFIG:
    833 	case RAIDFRAME_SET_ROOT:
    834 	case RAIDFRAME_DELETE_COMPONENT:
    835 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    836 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    837 			return (ENXIO);
    838 	}
    839 
    840 	switch (cmd) {
    841 
    842 		/* configure the system */
    843 	case RAIDFRAME_CONFIGURE:
    844 
    845 		if (raidPtr->valid) {
    846 			/* There is a valid RAID set running on this unit! */
    847 			printf("raid%d: Device already configured!\n",unit);
    848 			return(EINVAL);
    849 		}
    850 
    851 		/* copy-in the configuration information */
    852 		/* data points to a pointer to the configuration structure */
    853 
    854 		u_cfg = *((RF_Config_t **) data);
    855 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    856 		if (k_cfg == NULL) {
    857 			return (ENOMEM);
    858 		}
    859 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    860 		    sizeof(RF_Config_t));
    861 		if (retcode) {
    862 			RF_Free(k_cfg, sizeof(RF_Config_t));
    863 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    864 				retcode));
    865 			return (retcode);
    866 		}
    867 		/* allocate a buffer for the layout-specific data, and copy it
    868 		 * in */
    869 		if (k_cfg->layoutSpecificSize) {
    870 			if (k_cfg->layoutSpecificSize > 10000) {
    871 				/* sanity check */
    872 				RF_Free(k_cfg, sizeof(RF_Config_t));
    873 				return (EINVAL);
    874 			}
    875 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    876 			    (u_char *));
    877 			if (specific_buf == NULL) {
    878 				RF_Free(k_cfg, sizeof(RF_Config_t));
    879 				return (ENOMEM);
    880 			}
    881 			retcode = copyin(k_cfg->layoutSpecific,
    882 			    (caddr_t) specific_buf,
    883 			    k_cfg->layoutSpecificSize);
    884 			if (retcode) {
    885 				RF_Free(k_cfg, sizeof(RF_Config_t));
    886 				RF_Free(specific_buf,
    887 					k_cfg->layoutSpecificSize);
    888 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    889 					retcode));
    890 				return (retcode);
    891 			}
    892 		} else
    893 			specific_buf = NULL;
    894 		k_cfg->layoutSpecific = specific_buf;
    895 
    896 		/* should do some kind of sanity check on the configuration.
    897 		 * Store the sum of all the bytes in the last byte? */
    898 
    899 		/* configure the system */
    900 
    901 		/*
    902 		 * Clear the entire RAID descriptor, just to make sure
    903 		 *  there is no stale data left in the case of a
    904 		 *  reconfiguration
    905 		 */
    906 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    907 		raidPtr->raidid = unit;
    908 
    909 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    910 
    911 		if (retcode == 0) {
    912 
    913 			/* allow this many simultaneous IO's to
    914 			   this RAID device */
    915 			raidPtr->openings = RAIDOUTSTANDING;
    916 
    917 			raidinit(raidPtr);
    918 			rf_markalldirty(raidPtr);
    919 		}
    920 		/* free the buffers.  No return code here. */
    921 		if (k_cfg->layoutSpecificSize) {
    922 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    923 		}
    924 		RF_Free(k_cfg, sizeof(RF_Config_t));
    925 
    926 		return (retcode);
    927 
    928 		/* shutdown the system */
    929 	case RAIDFRAME_SHUTDOWN:
    930 
    931 		if ((error = raidlock(rs)) != 0)
    932 			return (error);
    933 
    934 		/*
    935 		 * If somebody has a partition mounted, we shouldn't
    936 		 * shutdown.
    937 		 */
    938 
    939 		part = DISKPART(dev);
    940 		pmask = (1 << part);
    941 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    942 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    943 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    944 			raidunlock(rs);
    945 			return (EBUSY);
    946 		}
    947 
    948 		retcode = rf_Shutdown(raidPtr);
    949 
    950 		pool_destroy(&rs->sc_cbufpool);
    951 
    952 		/* It's no longer initialized... */
    953 		rs->sc_flags &= ~RAIDF_INITED;
    954 
    955 		/* Detach the disk. */
    956 		disk_detach(&rs->sc_dkdev);
    957 
    958 		raidunlock(rs);
    959 
    960 		return (retcode);
    961 	case RAIDFRAME_GET_COMPONENT_LABEL:
    962 		clabel_ptr = (RF_ComponentLabel_t **) data;
    963 		/* need to read the component label for the disk indicated
    964 		   by row,column in clabel */
    965 
    966 		/* For practice, let's get it directly fromdisk, rather
    967 		   than from the in-core copy */
    968 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    969 			   (RF_ComponentLabel_t *));
    970 		if (clabel == NULL)
    971 			return (ENOMEM);
    972 
    973 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    974 
    975 		retcode = copyin( *clabel_ptr, clabel,
    976 				  sizeof(RF_ComponentLabel_t));
    977 
    978 		if (retcode) {
    979 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    980 			return(retcode);
    981 		}
    982 
    983 		row = clabel->row;
    984 		column = clabel->column;
    985 
    986 		if ((row < 0) || (row >= raidPtr->numRow) ||
    987 		    (column < 0) || (column >= raidPtr->numCol)) {
    988 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    989 			return(EINVAL);
    990 		}
    991 
    992 		raidread_component_label(raidPtr->Disks[row][column].dev,
    993 				raidPtr->raid_cinfo[row][column].ci_vp,
    994 				clabel );
    995 
    996 		retcode = copyout((caddr_t) clabel,
    997 				  (caddr_t) *clabel_ptr,
    998 				  sizeof(RF_ComponentLabel_t));
    999 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1000 		return (retcode);
   1001 
   1002 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1003 		clabel = (RF_ComponentLabel_t *) data;
   1004 
   1005 		/* XXX check the label for valid stuff... */
   1006 		/* Note that some things *should not* get modified --
   1007 		   the user should be re-initing the labels instead of
   1008 		   trying to patch things.
   1009 		   */
   1010 
   1011 		printf("Got component label:\n");
   1012 		printf("Version: %d\n",clabel->version);
   1013 		printf("Serial Number: %d\n",clabel->serial_number);
   1014 		printf("Mod counter: %d\n",clabel->mod_counter);
   1015 		printf("Row: %d\n", clabel->row);
   1016 		printf("Column: %d\n", clabel->column);
   1017 		printf("Num Rows: %d\n", clabel->num_rows);
   1018 		printf("Num Columns: %d\n", clabel->num_columns);
   1019 		printf("Clean: %d\n", clabel->clean);
   1020 		printf("Status: %d\n", clabel->status);
   1021 
   1022 		row = clabel->row;
   1023 		column = clabel->column;
   1024 
   1025 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1026 		    (column < 0) || (column >= raidPtr->numCol)) {
   1027 			return(EINVAL);
   1028 		}
   1029 
   1030 		/* XXX this isn't allowed to do anything for now :-) */
   1031 
   1032 		/* XXX and before it is, we need to fill in the rest
   1033 		   of the fields!?!?!?! */
   1034 #if 0
   1035 		raidwrite_component_label(
   1036                             raidPtr->Disks[row][column].dev,
   1037 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1038 			    clabel );
   1039 #endif
   1040 		return (0);
   1041 
   1042 	case RAIDFRAME_INIT_LABELS:
   1043 		clabel = (RF_ComponentLabel_t *) data;
   1044 		/*
   1045 		   we only want the serial number from
   1046 		   the above.  We get all the rest of the information
   1047 		   from the config that was used to create this RAID
   1048 		   set.
   1049 		   */
   1050 
   1051 		raidPtr->serial_number = clabel->serial_number;
   1052 
   1053 		raid_init_component_label(raidPtr, &ci_label);
   1054 		ci_label.serial_number = clabel->serial_number;
   1055 
   1056 		for(row=0;row<raidPtr->numRow;row++) {
   1057 			ci_label.row = row;
   1058 			for(column=0;column<raidPtr->numCol;column++) {
   1059 				diskPtr = &raidPtr->Disks[row][column];
   1060 				ci_label.partitionSize = diskPtr->partitionSize;
   1061 				ci_label.column = column;
   1062 				raidwrite_component_label(
   1063 				  raidPtr->Disks[row][column].dev,
   1064 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1065 				  &ci_label );
   1066 			}
   1067 		}
   1068 
   1069 		return (retcode);
   1070 	case RAIDFRAME_SET_AUTOCONFIG:
   1071 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1072 		printf("New autoconfig value is: %d\n", d);
   1073 		*(int *) data = d;
   1074 		return (retcode);
   1075 
   1076 	case RAIDFRAME_SET_ROOT:
   1077 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1078 		printf("New rootpartition value is: %d\n", d);
   1079 		*(int *) data = d;
   1080 		return (retcode);
   1081 
   1082 		/* initialize all parity */
   1083 	case RAIDFRAME_REWRITEPARITY:
   1084 
   1085 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1086 			/* Parity for RAID 0 is trivially correct */
   1087 			raidPtr->parity_good = RF_RAID_CLEAN;
   1088 			return(0);
   1089 		}
   1090 
   1091 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1092 			/* Re-write is already in progress! */
   1093 			return(EINVAL);
   1094 		}
   1095 
   1096 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1097 					   rf_RewriteParityThread,
   1098 					   raidPtr,"raid_parity");
   1099 		return (retcode);
   1100 
   1101 
   1102 	case RAIDFRAME_ADD_HOT_SPARE:
   1103 		sparePtr = (RF_SingleComponent_t *) data;
   1104 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1105 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1106 		return(retcode);
   1107 
   1108 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1109 		return(retcode);
   1110 
   1111 	case RAIDFRAME_DELETE_COMPONENT:
   1112 		componentPtr = (RF_SingleComponent_t *)data;
   1113 		memcpy( &component, componentPtr,
   1114 			sizeof(RF_SingleComponent_t));
   1115 		retcode = rf_delete_component(raidPtr, &component);
   1116 		return(retcode);
   1117 
   1118 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1119 		componentPtr = (RF_SingleComponent_t *)data;
   1120 		memcpy( &component, componentPtr,
   1121 			sizeof(RF_SingleComponent_t));
   1122 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1123 		return(retcode);
   1124 
   1125 	case RAIDFRAME_REBUILD_IN_PLACE:
   1126 
   1127 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1128 			/* Can't do this on a RAID 0!! */
   1129 			return(EINVAL);
   1130 		}
   1131 
   1132 		if (raidPtr->recon_in_progress == 1) {
   1133 			/* a reconstruct is already in progress! */
   1134 			return(EINVAL);
   1135 		}
   1136 
   1137 		componentPtr = (RF_SingleComponent_t *) data;
   1138 		memcpy( &component, componentPtr,
   1139 			sizeof(RF_SingleComponent_t));
   1140 		row = component.row;
   1141 		column = component.column;
   1142 		printf("Rebuild: %d %d\n",row, column);
   1143 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1144 		    (column < 0) || (column >= raidPtr->numCol)) {
   1145 			return(EINVAL);
   1146 		}
   1147 
   1148 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1149 		if (rrcopy == NULL)
   1150 			return(ENOMEM);
   1151 
   1152 		rrcopy->raidPtr = (void *) raidPtr;
   1153 		rrcopy->row = row;
   1154 		rrcopy->col = column;
   1155 
   1156 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1157 					   rf_ReconstructInPlaceThread,
   1158 					   rrcopy,"raid_reconip");
   1159 		return(retcode);
   1160 
   1161 	case RAIDFRAME_GET_INFO:
   1162 		if (!raidPtr->valid)
   1163 			return (ENODEV);
   1164 		ucfgp = (RF_DeviceConfig_t **) data;
   1165 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1166 			  (RF_DeviceConfig_t *));
   1167 		if (d_cfg == NULL)
   1168 			return (ENOMEM);
   1169 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1170 		d_cfg->rows = raidPtr->numRow;
   1171 		d_cfg->cols = raidPtr->numCol;
   1172 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1173 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1174 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1175 			return (ENOMEM);
   1176 		}
   1177 		d_cfg->nspares = raidPtr->numSpare;
   1178 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1179 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1180 			return (ENOMEM);
   1181 		}
   1182 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1183 		d = 0;
   1184 		for (i = 0; i < d_cfg->rows; i++) {
   1185 			for (j = 0; j < d_cfg->cols; j++) {
   1186 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1187 				d++;
   1188 			}
   1189 		}
   1190 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1191 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1192 		}
   1193 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1194 				  sizeof(RF_DeviceConfig_t));
   1195 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1196 
   1197 		return (retcode);
   1198 
   1199 	case RAIDFRAME_CHECK_PARITY:
   1200 		*(int *) data = raidPtr->parity_good;
   1201 		return (0);
   1202 
   1203 	case RAIDFRAME_RESET_ACCTOTALS:
   1204 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1205 		return (0);
   1206 
   1207 	case RAIDFRAME_GET_ACCTOTALS:
   1208 		totals = (RF_AccTotals_t *) data;
   1209 		*totals = raidPtr->acc_totals;
   1210 		return (0);
   1211 
   1212 	case RAIDFRAME_KEEP_ACCTOTALS:
   1213 		raidPtr->keep_acc_totals = *(int *)data;
   1214 		return (0);
   1215 
   1216 	case RAIDFRAME_GET_SIZE:
   1217 		*(int *) data = raidPtr->totalSectors;
   1218 		return (0);
   1219 
   1220 		/* fail a disk & optionally start reconstruction */
   1221 	case RAIDFRAME_FAIL_DISK:
   1222 
   1223 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1224 			/* Can't do this on a RAID 0!! */
   1225 			return(EINVAL);
   1226 		}
   1227 
   1228 		rr = (struct rf_recon_req *) data;
   1229 
   1230 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1231 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1232 			return (EINVAL);
   1233 
   1234 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1235 		       unit, rr->row, rr->col);
   1236 
   1237 		/* make a copy of the recon request so that we don't rely on
   1238 		 * the user's buffer */
   1239 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1240 		if (rrcopy == NULL)
   1241 			return(ENOMEM);
   1242 		bcopy(rr, rrcopy, sizeof(*rr));
   1243 		rrcopy->raidPtr = (void *) raidPtr;
   1244 
   1245 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1246 					   rf_ReconThread,
   1247 					   rrcopy,"raid_recon");
   1248 		return (0);
   1249 
   1250 		/* invoke a copyback operation after recon on whatever disk
   1251 		 * needs it, if any */
   1252 	case RAIDFRAME_COPYBACK:
   1253 
   1254 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1255 			/* This makes no sense on a RAID 0!! */
   1256 			return(EINVAL);
   1257 		}
   1258 
   1259 		if (raidPtr->copyback_in_progress == 1) {
   1260 			/* Copyback is already in progress! */
   1261 			return(EINVAL);
   1262 		}
   1263 
   1264 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1265 					   rf_CopybackThread,
   1266 					   raidPtr,"raid_copyback");
   1267 		return (retcode);
   1268 
   1269 		/* return the percentage completion of reconstruction */
   1270 	case RAIDFRAME_CHECK_RECON_STATUS:
   1271 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1272 			/* This makes no sense on a RAID 0, so tell the
   1273 			   user it's done. */
   1274 			*(int *) data = 100;
   1275 			return(0);
   1276 		}
   1277 		row = 0; /* XXX we only consider a single row... */
   1278 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1279 			*(int *) data = 100;
   1280 		else
   1281 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1282 		return (0);
   1283 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1284 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1285 		row = 0; /* XXX we only consider a single row... */
   1286 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1287 			progressInfo.remaining = 0;
   1288 			progressInfo.completed = 100;
   1289 			progressInfo.total = 100;
   1290 		} else {
   1291 			progressInfo.total =
   1292 				raidPtr->reconControl[row]->numRUsTotal;
   1293 			progressInfo.completed =
   1294 				raidPtr->reconControl[row]->numRUsComplete;
   1295 			progressInfo.remaining = progressInfo.total -
   1296 				progressInfo.completed;
   1297 		}
   1298 		retcode = copyout((caddr_t) &progressInfo,
   1299 				  (caddr_t) *progressInfoPtr,
   1300 				  sizeof(RF_ProgressInfo_t));
   1301 		return (retcode);
   1302 
   1303 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1304 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1305 			/* This makes no sense on a RAID 0, so tell the
   1306 			   user it's done. */
   1307 			*(int *) data = 100;
   1308 			return(0);
   1309 		}
   1310 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1311 			*(int *) data = 100 *
   1312 				raidPtr->parity_rewrite_stripes_done /
   1313 				raidPtr->Layout.numStripe;
   1314 		} else {
   1315 			*(int *) data = 100;
   1316 		}
   1317 		return (0);
   1318 
   1319 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1320 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1321 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1322 			progressInfo.total = raidPtr->Layout.numStripe;
   1323 			progressInfo.completed =
   1324 				raidPtr->parity_rewrite_stripes_done;
   1325 			progressInfo.remaining = progressInfo.total -
   1326 				progressInfo.completed;
   1327 		} else {
   1328 			progressInfo.remaining = 0;
   1329 			progressInfo.completed = 100;
   1330 			progressInfo.total = 100;
   1331 		}
   1332 		retcode = copyout((caddr_t) &progressInfo,
   1333 				  (caddr_t) *progressInfoPtr,
   1334 				  sizeof(RF_ProgressInfo_t));
   1335 		return (retcode);
   1336 
   1337 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1338 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1339 			/* This makes no sense on a RAID 0 */
   1340 			*(int *) data = 100;
   1341 			return(0);
   1342 		}
   1343 		if (raidPtr->copyback_in_progress == 1) {
   1344 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1345 				raidPtr->Layout.numStripe;
   1346 		} else {
   1347 			*(int *) data = 100;
   1348 		}
   1349 		return (0);
   1350 
   1351 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1352 		if (raidPtr->copyback_in_progress == 1) {
   1353 			progressInfo.total = raidPtr->Layout.numStripe;
   1354 			progressInfo.completed =
   1355 				raidPtr->parity_rewrite_stripes_done;
   1356 			progressInfo.remaining = progressInfo.total -
   1357 				progressInfo.completed;
   1358 		} else {
   1359 			progressInfo.remaining = 0;
   1360 			progressInfo.completed = 100;
   1361 			progressInfo.total = 100;
   1362 		}
   1363 		retcode = copyout((caddr_t) &progressInfo,
   1364 				  (caddr_t) *progressInfoPtr,
   1365 				  sizeof(RF_ProgressInfo_t));
   1366 		return (retcode);
   1367 
   1368 		/* the sparetable daemon calls this to wait for the kernel to
   1369 		 * need a spare table. this ioctl does not return until a
   1370 		 * spare table is needed. XXX -- calling mpsleep here in the
   1371 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1372 		 * -- I should either compute the spare table in the kernel,
   1373 		 * or have a different -- XXX XXX -- interface (a different
   1374 		 * character device) for delivering the table     -- XXX */
   1375 #if 0
   1376 	case RAIDFRAME_SPARET_WAIT:
   1377 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1378 		while (!rf_sparet_wait_queue)
   1379 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1380 		waitreq = rf_sparet_wait_queue;
   1381 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1382 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1383 
   1384 		/* structure assignment */
   1385 		*((RF_SparetWait_t *) data) = *waitreq;
   1386 
   1387 		RF_Free(waitreq, sizeof(*waitreq));
   1388 		return (0);
   1389 
   1390 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1391 		 * code in it that will cause the dameon to exit */
   1392 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1393 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1394 		waitreq->fcol = -1;
   1395 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1396 		waitreq->next = rf_sparet_wait_queue;
   1397 		rf_sparet_wait_queue = waitreq;
   1398 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1399 		wakeup(&rf_sparet_wait_queue);
   1400 		return (0);
   1401 
   1402 		/* used by the spare table daemon to deliver a spare table
   1403 		 * into the kernel */
   1404 	case RAIDFRAME_SEND_SPARET:
   1405 
   1406 		/* install the spare table */
   1407 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1408 
   1409 		/* respond to the requestor.  the return status of the spare
   1410 		 * table installation is passed in the "fcol" field */
   1411 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1412 		waitreq->fcol = retcode;
   1413 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1414 		waitreq->next = rf_sparet_resp_queue;
   1415 		rf_sparet_resp_queue = waitreq;
   1416 		wakeup(&rf_sparet_resp_queue);
   1417 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1418 
   1419 		return (retcode);
   1420 #endif
   1421 
   1422 	default:
   1423 		break; /* fall through to the os-specific code below */
   1424 
   1425 	}
   1426 
   1427 	if (!raidPtr->valid)
   1428 		return (EINVAL);
   1429 
   1430 	/*
   1431 	 * Add support for "regular" device ioctls here.
   1432 	 */
   1433 
   1434 	switch (cmd) {
   1435 	case DIOCGDINFO:
   1436 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1437 		break;
   1438 
   1439 	case DIOCGPART:
   1440 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1441 		((struct partinfo *) data)->part =
   1442 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1443 		break;
   1444 
   1445 	case DIOCWDINFO:
   1446 	case DIOCSDINFO:
   1447 		if ((error = raidlock(rs)) != 0)
   1448 			return (error);
   1449 
   1450 		rs->sc_flags |= RAIDF_LABELLING;
   1451 
   1452 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1453 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1454 		if (error == 0) {
   1455 			if (cmd == DIOCWDINFO)
   1456 				error = writedisklabel(RAIDLABELDEV(dev),
   1457 				    raidstrategy, rs->sc_dkdev.dk_label,
   1458 				    rs->sc_dkdev.dk_cpulabel);
   1459 		}
   1460 		rs->sc_flags &= ~RAIDF_LABELLING;
   1461 
   1462 		raidunlock(rs);
   1463 
   1464 		if (error)
   1465 			return (error);
   1466 		break;
   1467 
   1468 	case DIOCWLABEL:
   1469 		if (*(int *) data != 0)
   1470 			rs->sc_flags |= RAIDF_WLABEL;
   1471 		else
   1472 			rs->sc_flags &= ~RAIDF_WLABEL;
   1473 		break;
   1474 
   1475 	case DIOCGDEFLABEL:
   1476 		raidgetdefaultlabel(raidPtr, rs,
   1477 		    (struct disklabel *) data);
   1478 		break;
   1479 
   1480 	default:
   1481 		retcode = ENOTTY;
   1482 	}
   1483 	return (retcode);
   1484 
   1485 }
   1486 
   1487 
   1488 /* raidinit -- complete the rest of the initialization for the
   1489    RAIDframe device.  */
   1490 
   1491 
   1492 static void
   1493 raidinit(raidPtr)
   1494 	RF_Raid_t *raidPtr;
   1495 {
   1496 	struct raid_softc *rs;
   1497 	int     unit;
   1498 
   1499 	unit = raidPtr->raidid;
   1500 
   1501 	rs = &raid_softc[unit];
   1502 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1503 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1504 
   1505 
   1506 	/* XXX should check return code first... */
   1507 	rs->sc_flags |= RAIDF_INITED;
   1508 
   1509 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1510 
   1511 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1512 
   1513 	/* disk_attach actually creates space for the CPU disklabel, among
   1514 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1515 	 * with disklabels. */
   1516 
   1517 	disk_attach(&rs->sc_dkdev);
   1518 
   1519 	/* XXX There may be a weird interaction here between this, and
   1520 	 * protectedSectors, as used in RAIDframe.  */
   1521 
   1522 	rs->sc_size = raidPtr->totalSectors;
   1523 
   1524 }
   1525 
   1526 /* wake up the daemon & tell it to get us a spare table
   1527  * XXX
   1528  * the entries in the queues should be tagged with the raidPtr
   1529  * so that in the extremely rare case that two recons happen at once,
   1530  * we know for which device were requesting a spare table
   1531  * XXX
   1532  *
   1533  * XXX This code is not currently used. GO
   1534  */
   1535 int
   1536 rf_GetSpareTableFromDaemon(req)
   1537 	RF_SparetWait_t *req;
   1538 {
   1539 	int     retcode;
   1540 
   1541 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1542 	req->next = rf_sparet_wait_queue;
   1543 	rf_sparet_wait_queue = req;
   1544 	wakeup(&rf_sparet_wait_queue);
   1545 
   1546 	/* mpsleep unlocks the mutex */
   1547 	while (!rf_sparet_resp_queue) {
   1548 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1549 		    "raidframe getsparetable", 0);
   1550 	}
   1551 	req = rf_sparet_resp_queue;
   1552 	rf_sparet_resp_queue = req->next;
   1553 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1554 
   1555 	retcode = req->fcol;
   1556 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1557 					 * alloc'd */
   1558 	return (retcode);
   1559 }
   1560 
   1561 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1562  * bp & passes it down.
   1563  * any calls originating in the kernel must use non-blocking I/O
   1564  * do some extra sanity checking to return "appropriate" error values for
   1565  * certain conditions (to make some standard utilities work)
   1566  *
   1567  * Formerly known as: rf_DoAccessKernel
   1568  */
   1569 void
   1570 raidstart(raidPtr)
   1571 	RF_Raid_t *raidPtr;
   1572 {
   1573 	RF_SectorCount_t num_blocks, pb, sum;
   1574 	RF_RaidAddr_t raid_addr;
   1575 	int     retcode;
   1576 	struct partition *pp;
   1577 	daddr_t blocknum;
   1578 	int     unit;
   1579 	struct raid_softc *rs;
   1580 	int     do_async;
   1581 	struct buf *bp;
   1582 
   1583 	unit = raidPtr->raidid;
   1584 	rs = &raid_softc[unit];
   1585 
   1586 	/* quick check to see if anything has died recently */
   1587 	RF_LOCK_MUTEX(raidPtr->mutex);
   1588 	if (raidPtr->numNewFailures > 0) {
   1589 		rf_update_component_labels(raidPtr);
   1590 		raidPtr->numNewFailures--;
   1591 	}
   1592 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1593 
   1594 	/* Check to see if we're at the limit... */
   1595 	RF_LOCK_MUTEX(raidPtr->mutex);
   1596 	while (raidPtr->openings > 0) {
   1597 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1598 
   1599 		/* get the next item, if any, from the queue */
   1600 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1601 			/* nothing more to do */
   1602 			return;
   1603 		}
   1604 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1605 
   1606 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1607 		 * partition.. Need to make it absolute to the underlying
   1608 		 * device.. */
   1609 
   1610 		blocknum = bp->b_blkno;
   1611 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1612 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1613 			blocknum += pp->p_offset;
   1614 		}
   1615 
   1616 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1617 			    (int) blocknum));
   1618 
   1619 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1620 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1621 
   1622 		/* *THIS* is where we adjust what block we're going to...
   1623 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1624 		raid_addr = blocknum;
   1625 
   1626 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1627 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1628 		sum = raid_addr + num_blocks + pb;
   1629 		if (1 || rf_debugKernelAccess) {
   1630 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1631 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1632 				    (int) pb, (int) bp->b_resid));
   1633 		}
   1634 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1635 		    || (sum < num_blocks) || (sum < pb)) {
   1636 			bp->b_error = ENOSPC;
   1637 			bp->b_flags |= B_ERROR;
   1638 			bp->b_resid = bp->b_bcount;
   1639 			biodone(bp);
   1640 			RF_LOCK_MUTEX(raidPtr->mutex);
   1641 			continue;
   1642 		}
   1643 		/*
   1644 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1645 		 */
   1646 
   1647 		if (bp->b_bcount & raidPtr->sectorMask) {
   1648 			bp->b_error = EINVAL;
   1649 			bp->b_flags |= B_ERROR;
   1650 			bp->b_resid = bp->b_bcount;
   1651 			biodone(bp);
   1652 			RF_LOCK_MUTEX(raidPtr->mutex);
   1653 			continue;
   1654 
   1655 		}
   1656 		db1_printf(("Calling DoAccess..\n"));
   1657 
   1658 
   1659 		RF_LOCK_MUTEX(raidPtr->mutex);
   1660 		raidPtr->openings--;
   1661 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1662 
   1663 		/*
   1664 		 * Everything is async.
   1665 		 */
   1666 		do_async = 1;
   1667 
   1668 		/* don't ever condition on bp->b_flags & B_WRITE.
   1669 		 * always condition on B_READ instead */
   1670 
   1671 		/* XXX we're still at splbio() here... do we *really*
   1672 		   need to be? */
   1673 
   1674 
   1675 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1676 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1677 				      do_async, raid_addr, num_blocks,
   1678 				      bp->b_data, bp, NULL, NULL,
   1679 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1680 
   1681 
   1682 		RF_LOCK_MUTEX(raidPtr->mutex);
   1683 	}
   1684 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1685 }
   1686 
   1687 
   1688 
   1689 
   1690 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1691 
   1692 int
   1693 rf_DispatchKernelIO(queue, req)
   1694 	RF_DiskQueue_t *queue;
   1695 	RF_DiskQueueData_t *req;
   1696 {
   1697 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1698 	struct buf *bp;
   1699 	struct raidbuf *raidbp = NULL;
   1700 	struct raid_softc *rs;
   1701 	int     unit;
   1702 	int s;
   1703 
   1704 	s=0;
   1705 	/* s = splbio();*/ /* want to test this */
   1706 	/* XXX along with the vnode, we also need the softc associated with
   1707 	 * this device.. */
   1708 
   1709 	req->queue = queue;
   1710 
   1711 	unit = queue->raidPtr->raidid;
   1712 
   1713 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1714 
   1715 	if (unit >= numraid) {
   1716 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1717 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1718 	}
   1719 	rs = &raid_softc[unit];
   1720 
   1721 	/* XXX is this the right place? */
   1722 	disk_busy(&rs->sc_dkdev);
   1723 
   1724 	bp = req->bp;
   1725 #if 1
   1726 	/* XXX when there is a physical disk failure, someone is passing us a
   1727 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1728 	 * without taking a performance hit... (not sure where the real bug
   1729 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1730 
   1731 	if (bp->b_flags & B_ERROR) {
   1732 		bp->b_flags &= ~B_ERROR;
   1733 	}
   1734 	if (bp->b_error != 0) {
   1735 		bp->b_error = 0;
   1736 	}
   1737 #endif
   1738 	raidbp = RAIDGETBUF(rs);
   1739 
   1740 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1741 
   1742 	/*
   1743 	 * context for raidiodone
   1744 	 */
   1745 	raidbp->rf_obp = bp;
   1746 	raidbp->req = req;
   1747 
   1748 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1749 
   1750 	switch (req->type) {
   1751 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1752 		/* XXX need to do something extra here.. */
   1753 		/* I'm leaving this in, as I've never actually seen it used,
   1754 		 * and I'd like folks to report it... GO */
   1755 		printf(("WAKEUP CALLED\n"));
   1756 		queue->numOutstanding++;
   1757 
   1758 		/* XXX need to glue the original buffer into this??  */
   1759 
   1760 		KernelWakeupFunc(&raidbp->rf_buf);
   1761 		break;
   1762 
   1763 	case RF_IO_TYPE_READ:
   1764 	case RF_IO_TYPE_WRITE:
   1765 
   1766 		if (req->tracerec) {
   1767 			RF_ETIMER_START(req->tracerec->timer);
   1768 		}
   1769 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1770 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1771 		    req->sectorOffset, req->numSector,
   1772 		    req->buf, KernelWakeupFunc, (void *) req,
   1773 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1774 
   1775 		if (rf_debugKernelAccess) {
   1776 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1777 				(long) bp->b_blkno));
   1778 		}
   1779 		queue->numOutstanding++;
   1780 		queue->last_deq_sector = req->sectorOffset;
   1781 		/* acc wouldn't have been let in if there were any pending
   1782 		 * reqs at any other priority */
   1783 		queue->curPriority = req->priority;
   1784 
   1785 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1786 			req->type, unit, queue->row, queue->col));
   1787 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1788 			(int) req->sectorOffset, (int) req->numSector,
   1789 			(int) (req->numSector <<
   1790 			    queue->raidPtr->logBytesPerSector),
   1791 			(int) queue->raidPtr->logBytesPerSector));
   1792 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1793 			raidbp->rf_buf.b_vp->v_numoutput++;
   1794 		}
   1795 		VOP_STRATEGY(&raidbp->rf_buf);
   1796 
   1797 		break;
   1798 
   1799 	default:
   1800 		panic("bad req->type in rf_DispatchKernelIO");
   1801 	}
   1802 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1803 	/* splx(s); */ /* want to test this */
   1804 	return (0);
   1805 }
   1806 /* this is the callback function associated with a I/O invoked from
   1807    kernel code.
   1808  */
   1809 static void
   1810 KernelWakeupFunc(vbp)
   1811 	struct buf *vbp;
   1812 {
   1813 	RF_DiskQueueData_t *req = NULL;
   1814 	RF_DiskQueue_t *queue;
   1815 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1816 	struct buf *bp;
   1817 	struct raid_softc *rs;
   1818 	int     unit;
   1819 	int s;
   1820 
   1821 	s = splbio();
   1822 	db1_printf(("recovering the request queue:\n"));
   1823 	req = raidbp->req;
   1824 
   1825 	bp = raidbp->rf_obp;
   1826 
   1827 	queue = (RF_DiskQueue_t *) req->queue;
   1828 
   1829 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1830 		bp->b_flags |= B_ERROR;
   1831 		bp->b_error = raidbp->rf_buf.b_error ?
   1832 		    raidbp->rf_buf.b_error : EIO;
   1833 	}
   1834 
   1835 	/* XXX methinks this could be wrong... */
   1836 #if 1
   1837 	bp->b_resid = raidbp->rf_buf.b_resid;
   1838 #endif
   1839 
   1840 	if (req->tracerec) {
   1841 		RF_ETIMER_STOP(req->tracerec->timer);
   1842 		RF_ETIMER_EVAL(req->tracerec->timer);
   1843 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1844 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1845 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1846 		req->tracerec->num_phys_ios++;
   1847 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1848 	}
   1849 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1850 
   1851 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1852 
   1853 
   1854 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1855 	 * ballistic, and mark the component as hosed... */
   1856 
   1857 	if (bp->b_flags & B_ERROR) {
   1858 		/* Mark the disk as dead */
   1859 		/* but only mark it once... */
   1860 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1861 		    rf_ds_optimal) {
   1862 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1863 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1864 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1865 			    rf_ds_failed;
   1866 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1867 			queue->raidPtr->numFailures++;
   1868 			queue->raidPtr->numNewFailures++;
   1869 			/* XXX here we should bump the version number for each component, and write that data out */
   1870 		} else {	/* Disk is already dead... */
   1871 			/* printf("Disk already marked as dead!\n"); */
   1872 		}
   1873 
   1874 	}
   1875 
   1876 	rs = &raid_softc[unit];
   1877 	RAIDPUTBUF(rs, raidbp);
   1878 
   1879 
   1880 	if (bp->b_resid == 0) {
   1881 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1882 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1883 	}
   1884 
   1885 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1886 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1887 
   1888 	splx(s);
   1889 }
   1890 
   1891 
   1892 
   1893 /*
   1894  * initialize a buf structure for doing an I/O in the kernel.
   1895  */
   1896 static void
   1897 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1898        logBytesPerSector, b_proc)
   1899 	struct buf *bp;
   1900 	struct vnode *b_vp;
   1901 	unsigned rw_flag;
   1902 	dev_t dev;
   1903 	RF_SectorNum_t startSect;
   1904 	RF_SectorCount_t numSect;
   1905 	caddr_t buf;
   1906 	void (*cbFunc) (struct buf *);
   1907 	void *cbArg;
   1908 	int logBytesPerSector;
   1909 	struct proc *b_proc;
   1910 {
   1911 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1912 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1913 	bp->b_bcount = numSect << logBytesPerSector;
   1914 	bp->b_bufsize = bp->b_bcount;
   1915 	bp->b_error = 0;
   1916 	bp->b_dev = dev;
   1917 	bp->b_data = buf;
   1918 	bp->b_blkno = startSect;
   1919 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1920 	if (bp->b_bcount == 0) {
   1921 		panic("bp->b_bcount is zero in InitBP!!\n");
   1922 	}
   1923 	bp->b_proc = b_proc;
   1924 	bp->b_iodone = cbFunc;
   1925 	bp->b_vp = b_vp;
   1926 
   1927 }
   1928 
   1929 static void
   1930 raidgetdefaultlabel(raidPtr, rs, lp)
   1931 	RF_Raid_t *raidPtr;
   1932 	struct raid_softc *rs;
   1933 	struct disklabel *lp;
   1934 {
   1935 	db1_printf(("Building a default label...\n"));
   1936 	bzero(lp, sizeof(*lp));
   1937 
   1938 	/* fabricate a label... */
   1939 	lp->d_secperunit = raidPtr->totalSectors;
   1940 	lp->d_secsize = raidPtr->bytesPerSector;
   1941 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1942 	lp->d_ntracks = 1;
   1943 	lp->d_ncylinders = raidPtr->totalSectors /
   1944 		(lp->d_nsectors * lp->d_ntracks);
   1945 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1946 
   1947 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1948 	lp->d_type = DTYPE_RAID;
   1949 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1950 	lp->d_rpm = 3600;
   1951 	lp->d_interleave = 1;
   1952 	lp->d_flags = 0;
   1953 
   1954 	lp->d_partitions[RAW_PART].p_offset = 0;
   1955 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1956 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1957 	lp->d_npartitions = RAW_PART + 1;
   1958 
   1959 	lp->d_magic = DISKMAGIC;
   1960 	lp->d_magic2 = DISKMAGIC;
   1961 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1962 
   1963 }
   1964 /*
   1965  * Read the disklabel from the raid device.  If one is not present, fake one
   1966  * up.
   1967  */
   1968 static void
   1969 raidgetdisklabel(dev)
   1970 	dev_t   dev;
   1971 {
   1972 	int     unit = raidunit(dev);
   1973 	struct raid_softc *rs = &raid_softc[unit];
   1974 	char   *errstring;
   1975 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1976 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1977 	RF_Raid_t *raidPtr;
   1978 
   1979 	db1_printf(("Getting the disklabel...\n"));
   1980 
   1981 	bzero(clp, sizeof(*clp));
   1982 
   1983 	raidPtr = raidPtrs[unit];
   1984 
   1985 	raidgetdefaultlabel(raidPtr, rs, lp);
   1986 
   1987 	/*
   1988 	 * Call the generic disklabel extraction routine.
   1989 	 */
   1990 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1991 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1992 	if (errstring)
   1993 		raidmakedisklabel(rs);
   1994 	else {
   1995 		int     i;
   1996 		struct partition *pp;
   1997 
   1998 		/*
   1999 		 * Sanity check whether the found disklabel is valid.
   2000 		 *
   2001 		 * This is necessary since total size of the raid device
   2002 		 * may vary when an interleave is changed even though exactly
   2003 		 * same componets are used, and old disklabel may used
   2004 		 * if that is found.
   2005 		 */
   2006 		if (lp->d_secperunit != rs->sc_size)
   2007 			printf("WARNING: %s: "
   2008 			    "total sector size in disklabel (%d) != "
   2009 			    "the size of raid (%ld)\n", rs->sc_xname,
   2010 			    lp->d_secperunit, (long) rs->sc_size);
   2011 		for (i = 0; i < lp->d_npartitions; i++) {
   2012 			pp = &lp->d_partitions[i];
   2013 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2014 				printf("WARNING: %s: end of partition `%c' "
   2015 				    "exceeds the size of raid (%ld)\n",
   2016 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2017 		}
   2018 	}
   2019 
   2020 }
   2021 /*
   2022  * Take care of things one might want to take care of in the event
   2023  * that a disklabel isn't present.
   2024  */
   2025 static void
   2026 raidmakedisklabel(rs)
   2027 	struct raid_softc *rs;
   2028 {
   2029 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2030 	db1_printf(("Making a label..\n"));
   2031 
   2032 	/*
   2033 	 * For historical reasons, if there's no disklabel present
   2034 	 * the raw partition must be marked FS_BSDFFS.
   2035 	 */
   2036 
   2037 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2038 
   2039 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2040 
   2041 	lp->d_checksum = dkcksum(lp);
   2042 }
   2043 /*
   2044  * Lookup the provided name in the filesystem.  If the file exists,
   2045  * is a valid block device, and isn't being used by anyone else,
   2046  * set *vpp to the file's vnode.
   2047  * You'll find the original of this in ccd.c
   2048  */
   2049 int
   2050 raidlookup(path, p, vpp)
   2051 	char   *path;
   2052 	struct proc *p;
   2053 	struct vnode **vpp;	/* result */
   2054 {
   2055 	struct nameidata nd;
   2056 	struct vnode *vp;
   2057 	struct vattr va;
   2058 	int     error;
   2059 
   2060 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2061 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2062 #ifdef DEBUG
   2063 		printf("RAIDframe: vn_open returned %d\n", error);
   2064 #endif
   2065 		return (error);
   2066 	}
   2067 	vp = nd.ni_vp;
   2068 	if (vp->v_usecount > 1) {
   2069 		VOP_UNLOCK(vp, 0);
   2070 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2071 		return (EBUSY);
   2072 	}
   2073 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2074 		VOP_UNLOCK(vp, 0);
   2075 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2076 		return (error);
   2077 	}
   2078 	/* XXX: eventually we should handle VREG, too. */
   2079 	if (va.va_type != VBLK) {
   2080 		VOP_UNLOCK(vp, 0);
   2081 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2082 		return (ENOTBLK);
   2083 	}
   2084 	VOP_UNLOCK(vp, 0);
   2085 	*vpp = vp;
   2086 	return (0);
   2087 }
   2088 /*
   2089  * Wait interruptibly for an exclusive lock.
   2090  *
   2091  * XXX
   2092  * Several drivers do this; it should be abstracted and made MP-safe.
   2093  * (Hmm... where have we seen this warning before :->  GO )
   2094  */
   2095 static int
   2096 raidlock(rs)
   2097 	struct raid_softc *rs;
   2098 {
   2099 	int     error;
   2100 
   2101 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2102 		rs->sc_flags |= RAIDF_WANTED;
   2103 		if ((error =
   2104 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2105 			return (error);
   2106 	}
   2107 	rs->sc_flags |= RAIDF_LOCKED;
   2108 	return (0);
   2109 }
   2110 /*
   2111  * Unlock and wake up any waiters.
   2112  */
   2113 static void
   2114 raidunlock(rs)
   2115 	struct raid_softc *rs;
   2116 {
   2117 
   2118 	rs->sc_flags &= ~RAIDF_LOCKED;
   2119 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2120 		rs->sc_flags &= ~RAIDF_WANTED;
   2121 		wakeup(rs);
   2122 	}
   2123 }
   2124 
   2125 
   2126 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2127 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2128 
   2129 int
   2130 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2131 {
   2132 	RF_ComponentLabel_t clabel;
   2133 	raidread_component_label(dev, b_vp, &clabel);
   2134 	clabel.mod_counter = mod_counter;
   2135 	clabel.clean = RF_RAID_CLEAN;
   2136 	raidwrite_component_label(dev, b_vp, &clabel);
   2137 	return(0);
   2138 }
   2139 
   2140 
   2141 int
   2142 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2143 {
   2144 	RF_ComponentLabel_t clabel;
   2145 	raidread_component_label(dev, b_vp, &clabel);
   2146 	clabel.mod_counter = mod_counter;
   2147 	clabel.clean = RF_RAID_DIRTY;
   2148 	raidwrite_component_label(dev, b_vp, &clabel);
   2149 	return(0);
   2150 }
   2151 
   2152 /* ARGSUSED */
   2153 int
   2154 raidread_component_label(dev, b_vp, clabel)
   2155 	dev_t dev;
   2156 	struct vnode *b_vp;
   2157 	RF_ComponentLabel_t *clabel;
   2158 {
   2159 	struct buf *bp;
   2160 	int error;
   2161 
   2162 	/* XXX should probably ensure that we don't try to do this if
   2163 	   someone has changed rf_protected_sectors. */
   2164 
   2165 	/* get a block of the appropriate size... */
   2166 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2167 	bp->b_dev = dev;
   2168 
   2169 	/* get our ducks in a row for the read */
   2170 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2171 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2172 	bp->b_flags = B_BUSY | B_READ;
   2173  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2174 
   2175 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2176 
   2177 	error = biowait(bp);
   2178 
   2179 	if (!error) {
   2180 		memcpy(clabel, bp->b_data,
   2181 		       sizeof(RF_ComponentLabel_t));
   2182 #if 0
   2183 		rf_print_component_label( clabel );
   2184 #endif
   2185         } else {
   2186 #if 0
   2187 		printf("Failed to read RAID component label!\n");
   2188 #endif
   2189 	}
   2190 
   2191         bp->b_flags = B_INVAL | B_AGE;
   2192 	brelse(bp);
   2193 	return(error);
   2194 }
   2195 /* ARGSUSED */
   2196 int
   2197 raidwrite_component_label(dev, b_vp, clabel)
   2198 	dev_t dev;
   2199 	struct vnode *b_vp;
   2200 	RF_ComponentLabel_t *clabel;
   2201 {
   2202 	struct buf *bp;
   2203 	int error;
   2204 
   2205 	/* get a block of the appropriate size... */
   2206 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2207 	bp->b_dev = dev;
   2208 
   2209 	/* get our ducks in a row for the write */
   2210 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2211 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2212 	bp->b_flags = B_BUSY | B_WRITE;
   2213  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2214 
   2215 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2216 
   2217 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2218 
   2219 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2220 	error = biowait(bp);
   2221         bp->b_flags = B_INVAL | B_AGE;
   2222 	brelse(bp);
   2223 	if (error) {
   2224 #if 1
   2225 		printf("Failed to write RAID component info!\n");
   2226 #endif
   2227 	}
   2228 
   2229 	return(error);
   2230 }
   2231 
   2232 void
   2233 rf_markalldirty(raidPtr)
   2234 	RF_Raid_t *raidPtr;
   2235 {
   2236 	RF_ComponentLabel_t clabel;
   2237 	int r,c;
   2238 
   2239 	raidPtr->mod_counter++;
   2240 	for (r = 0; r < raidPtr->numRow; r++) {
   2241 		for (c = 0; c < raidPtr->numCol; c++) {
   2242 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2243 				raidread_component_label(
   2244 					raidPtr->Disks[r][c].dev,
   2245 					raidPtr->raid_cinfo[r][c].ci_vp,
   2246 					&clabel);
   2247 				if (clabel.status == rf_ds_spared) {
   2248 					/* XXX do something special...
   2249 					 but whatever you do, don't
   2250 					 try to access it!! */
   2251 				} else {
   2252 #if 0
   2253 				clabel.status =
   2254 					raidPtr->Disks[r][c].status;
   2255 				raidwrite_component_label(
   2256 					raidPtr->Disks[r][c].dev,
   2257 					raidPtr->raid_cinfo[r][c].ci_vp,
   2258 					&clabel);
   2259 #endif
   2260 				raidmarkdirty(
   2261 				       raidPtr->Disks[r][c].dev,
   2262 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2263 				       raidPtr->mod_counter);
   2264 				}
   2265 			}
   2266 		}
   2267 	}
   2268 	/* printf("Component labels marked dirty.\n"); */
   2269 #if 0
   2270 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2271 		sparecol = raidPtr->numCol + c;
   2272 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2273 			/*
   2274 
   2275 			   XXX this is where we get fancy and map this spare
   2276 			   into it's correct spot in the array.
   2277 
   2278 			 */
   2279 			/*
   2280 
   2281 			   we claim this disk is "optimal" if it's
   2282 			   rf_ds_used_spare, as that means it should be
   2283 			   directly substitutable for the disk it replaced.
   2284 			   We note that too...
   2285 
   2286 			 */
   2287 
   2288 			for(i=0;i<raidPtr->numRow;i++) {
   2289 				for(j=0;j<raidPtr->numCol;j++) {
   2290 					if ((raidPtr->Disks[i][j].spareRow ==
   2291 					     r) &&
   2292 					    (raidPtr->Disks[i][j].spareCol ==
   2293 					     sparecol)) {
   2294 						srow = r;
   2295 						scol = sparecol;
   2296 						break;
   2297 					}
   2298 				}
   2299 			}
   2300 
   2301 			raidread_component_label(
   2302 				      raidPtr->Disks[r][sparecol].dev,
   2303 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2304 				      &clabel);
   2305 			/* make sure status is noted */
   2306 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2307 			clabel.mod_counter = raidPtr->mod_counter;
   2308 			clabel.serial_number = raidPtr->serial_number;
   2309 			clabel.row = srow;
   2310 			clabel.column = scol;
   2311 			clabel.num_rows = raidPtr->numRow;
   2312 			clabel.num_columns = raidPtr->numCol;
   2313 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2314 			clabel.status = rf_ds_optimal;
   2315 			raidwrite_component_label(
   2316 				      raidPtr->Disks[r][sparecol].dev,
   2317 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2318 				      &clabel);
   2319 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2320 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2321 		}
   2322 	}
   2323 
   2324 #endif
   2325 }
   2326 
   2327 
   2328 void
   2329 rf_update_component_labels(raidPtr)
   2330 	RF_Raid_t *raidPtr;
   2331 {
   2332 	RF_ComponentLabel_t clabel;
   2333 	int sparecol;
   2334 	int r,c;
   2335 	int i,j;
   2336 	int srow, scol;
   2337 
   2338 	srow = -1;
   2339 	scol = -1;
   2340 
   2341 	/* XXX should do extra checks to make sure things really are clean,
   2342 	   rather than blindly setting the clean bit... */
   2343 
   2344 	raidPtr->mod_counter++;
   2345 
   2346 	for (r = 0; r < raidPtr->numRow; r++) {
   2347 		for (c = 0; c < raidPtr->numCol; c++) {
   2348 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2349 				raidread_component_label(
   2350 					raidPtr->Disks[r][c].dev,
   2351 					raidPtr->raid_cinfo[r][c].ci_vp,
   2352 					&clabel);
   2353 				/* make sure status is noted */
   2354 				clabel.status = rf_ds_optimal;
   2355 				/* bump the counter */
   2356 				clabel.mod_counter = raidPtr->mod_counter;
   2357 
   2358 				raidwrite_component_label(
   2359 					raidPtr->Disks[r][c].dev,
   2360 					raidPtr->raid_cinfo[r][c].ci_vp,
   2361 					&clabel);
   2362 			}
   2363 			/* else we don't touch it.. */
   2364 		}
   2365 	}
   2366 
   2367 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2368 		sparecol = raidPtr->numCol + c;
   2369 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2370 			/*
   2371 
   2372 			   we claim this disk is "optimal" if it's
   2373 			   rf_ds_used_spare, as that means it should be
   2374 			   directly substitutable for the disk it replaced.
   2375 			   We note that too...
   2376 
   2377 			 */
   2378 
   2379 			for(i=0;i<raidPtr->numRow;i++) {
   2380 				for(j=0;j<raidPtr->numCol;j++) {
   2381 					if ((raidPtr->Disks[i][j].spareRow ==
   2382 					     0) &&
   2383 					    (raidPtr->Disks[i][j].spareCol ==
   2384 					     sparecol)) {
   2385 						srow = i;
   2386 						scol = j;
   2387 						break;
   2388 					}
   2389 				}
   2390 			}
   2391 
   2392 			/* XXX shouldn't *really* need this... */
   2393 			raidread_component_label(
   2394 				      raidPtr->Disks[0][sparecol].dev,
   2395 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2396 				      &clabel);
   2397 			/* make sure status is noted */
   2398 
   2399 			raid_init_component_label(raidPtr, &clabel);
   2400 
   2401 			clabel.mod_counter = raidPtr->mod_counter;
   2402 			clabel.row = srow;
   2403 			clabel.column = scol;
   2404 			clabel.status = rf_ds_optimal;
   2405 
   2406 			raidwrite_component_label(
   2407 				      raidPtr->Disks[0][sparecol].dev,
   2408 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2409 				      &clabel);
   2410 		}
   2411 	}
   2412 	/* 	printf("Component labels updated\n"); */
   2413 }
   2414 
   2415 
   2416 void
   2417 rf_final_update_component_labels(raidPtr)
   2418 	RF_Raid_t *raidPtr;
   2419 {
   2420 	RF_ComponentLabel_t clabel;
   2421 	int sparecol;
   2422 	int r,c;
   2423 	int i,j;
   2424 	int srow, scol;
   2425 
   2426 	srow = -1;
   2427 	scol = -1;
   2428 
   2429 	/* XXX should do extra checks to make sure things really are clean,
   2430 	   rather than blindly setting the clean bit... */
   2431 
   2432 	raidPtr->mod_counter++;
   2433 
   2434 	for (r = 0; r < raidPtr->numRow; r++) {
   2435 		for (c = 0; c < raidPtr->numCol; c++) {
   2436 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2437 				raidread_component_label(
   2438 					raidPtr->Disks[r][c].dev,
   2439 					raidPtr->raid_cinfo[r][c].ci_vp,
   2440 					&clabel);
   2441 				/* make sure status is noted */
   2442 				clabel.status = rf_ds_optimal;
   2443 				/* bump the counter */
   2444 				clabel.mod_counter = raidPtr->mod_counter;
   2445 
   2446 				raidwrite_component_label(
   2447 					raidPtr->Disks[r][c].dev,
   2448 					raidPtr->raid_cinfo[r][c].ci_vp,
   2449 					&clabel);
   2450 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2451 					raidmarkclean(
   2452 					      raidPtr->Disks[r][c].dev,
   2453 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2454 					      raidPtr->mod_counter);
   2455 				}
   2456 			}
   2457 			/* else we don't touch it.. */
   2458 		}
   2459 	}
   2460 
   2461 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2462 		sparecol = raidPtr->numCol + c;
   2463 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2464 			/*
   2465 
   2466 			   we claim this disk is "optimal" if it's
   2467 			   rf_ds_used_spare, as that means it should be
   2468 			   directly substitutable for the disk it replaced.
   2469 			   We note that too...
   2470 
   2471 			 */
   2472 
   2473 			for(i=0;i<raidPtr->numRow;i++) {
   2474 				for(j=0;j<raidPtr->numCol;j++) {
   2475 					if ((raidPtr->Disks[i][j].spareRow ==
   2476 					     0) &&
   2477 					    (raidPtr->Disks[i][j].spareCol ==
   2478 					     sparecol)) {
   2479 						srow = i;
   2480 						scol = j;
   2481 						break;
   2482 					}
   2483 				}
   2484 			}
   2485 
   2486 			/* XXX shouldn't *really* need this... */
   2487 			raidread_component_label(
   2488 				      raidPtr->Disks[0][sparecol].dev,
   2489 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2490 				      &clabel);
   2491 			/* make sure status is noted */
   2492 
   2493 			raid_init_component_label(raidPtr, &clabel);
   2494 
   2495 			clabel.mod_counter = raidPtr->mod_counter;
   2496 			clabel.row = srow;
   2497 			clabel.column = scol;
   2498 			clabel.status = rf_ds_optimal;
   2499 
   2500 			raidwrite_component_label(
   2501 				      raidPtr->Disks[0][sparecol].dev,
   2502 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2503 				      &clabel);
   2504 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2505 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2506 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2507 					       raidPtr->mod_counter);
   2508 			}
   2509 		}
   2510 	}
   2511 	/* 	printf("Component labels updated\n"); */
   2512 }
   2513 
   2514 void
   2515 rf_close_component(raidPtr, vp, auto_configured)
   2516 	RF_Raid_t *raidPtr;
   2517 	struct vnode *vp;
   2518 	int auto_configured;
   2519 {
   2520 	struct proc *p;
   2521 
   2522 	p = raidPtr->engine_thread;
   2523 
   2524 	if (vp != NULL) {
   2525 		if (auto_configured == 1) {
   2526 			VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2527 			vput(vp);
   2528 
   2529 		} else {
   2530 			VOP_UNLOCK(vp, 0);
   2531 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2532 		}
   2533 	} else {
   2534 		printf("vnode was NULL\n");
   2535 	}
   2536 }
   2537 
   2538 
   2539 void
   2540 rf_UnconfigureVnodes(raidPtr)
   2541 	RF_Raid_t *raidPtr;
   2542 {
   2543 	int r,c;
   2544 	struct proc *p;
   2545 	struct vnode *vp;
   2546 	int acd;
   2547 
   2548 
   2549 	/* We take this opportunity to close the vnodes like we should.. */
   2550 
   2551 	p = raidPtr->engine_thread;
   2552 
   2553 	for (r = 0; r < raidPtr->numRow; r++) {
   2554 		for (c = 0; c < raidPtr->numCol; c++) {
   2555 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2556 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2557 			acd = raidPtr->Disks[r][c].auto_configured;
   2558 			rf_close_component(raidPtr, vp, acd);
   2559 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2560 			raidPtr->Disks[r][c].auto_configured = 0;
   2561 		}
   2562 	}
   2563 	for (r = 0; r < raidPtr->numSpare; r++) {
   2564 		printf("Closing vnode for spare: %d\n", r);
   2565 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2566 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2567 		rf_close_component(raidPtr, vp, acd);
   2568 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2569 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2570 	}
   2571 }
   2572 
   2573 
   2574 void
   2575 rf_ReconThread(req)
   2576 	struct rf_recon_req *req;
   2577 {
   2578 	int     s;
   2579 	RF_Raid_t *raidPtr;
   2580 
   2581 	s = splbio();
   2582 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2583 	raidPtr->recon_in_progress = 1;
   2584 
   2585 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2586 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2587 
   2588 	/* XXX get rid of this! we don't need it at all.. */
   2589 	RF_Free(req, sizeof(*req));
   2590 
   2591 	raidPtr->recon_in_progress = 0;
   2592 	splx(s);
   2593 
   2594 	/* That's all... */
   2595 	kthread_exit(0);        /* does not return */
   2596 }
   2597 
   2598 void
   2599 rf_RewriteParityThread(raidPtr)
   2600 	RF_Raid_t *raidPtr;
   2601 {
   2602 	int retcode;
   2603 	int s;
   2604 
   2605 	raidPtr->parity_rewrite_in_progress = 1;
   2606 	s = splbio();
   2607 	retcode = rf_RewriteParity(raidPtr);
   2608 	splx(s);
   2609 	if (retcode) {
   2610 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2611 	} else {
   2612 		/* set the clean bit!  If we shutdown correctly,
   2613 		   the clean bit on each component label will get
   2614 		   set */
   2615 		raidPtr->parity_good = RF_RAID_CLEAN;
   2616 	}
   2617 	raidPtr->parity_rewrite_in_progress = 0;
   2618 
   2619 	/* Anyone waiting for us to stop?  If so, inform them... */
   2620 	if (raidPtr->waitShutdown) {
   2621 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2622 	}
   2623 
   2624 	/* That's all... */
   2625 	kthread_exit(0);        /* does not return */
   2626 }
   2627 
   2628 
   2629 void
   2630 rf_CopybackThread(raidPtr)
   2631 	RF_Raid_t *raidPtr;
   2632 {
   2633 	int s;
   2634 
   2635 	raidPtr->copyback_in_progress = 1;
   2636 	s = splbio();
   2637 	rf_CopybackReconstructedData(raidPtr);
   2638 	splx(s);
   2639 	raidPtr->copyback_in_progress = 0;
   2640 
   2641 	/* That's all... */
   2642 	kthread_exit(0);        /* does not return */
   2643 }
   2644 
   2645 
   2646 void
   2647 rf_ReconstructInPlaceThread(req)
   2648 	struct rf_recon_req *req;
   2649 {
   2650 	int retcode;
   2651 	int s;
   2652 	RF_Raid_t *raidPtr;
   2653 
   2654 	s = splbio();
   2655 	raidPtr = req->raidPtr;
   2656 	raidPtr->recon_in_progress = 1;
   2657 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2658 	RF_Free(req, sizeof(*req));
   2659 	raidPtr->recon_in_progress = 0;
   2660 	splx(s);
   2661 
   2662 	/* That's all... */
   2663 	kthread_exit(0);        /* does not return */
   2664 }
   2665 
   2666 void
   2667 rf_mountroot_hook(dev)
   2668 	struct device *dev;
   2669 {
   2670 
   2671 }
   2672 
   2673 
   2674 RF_AutoConfig_t *
   2675 rf_find_raid_components()
   2676 {
   2677 	struct devnametobdevmaj *dtobdm;
   2678 	struct vnode *vp;
   2679 	struct disklabel label;
   2680 	struct device *dv;
   2681 	char *cd_name;
   2682 	dev_t dev;
   2683 	int error;
   2684 	int i;
   2685 	int good_one;
   2686 	RF_ComponentLabel_t *clabel;
   2687 	RF_AutoConfig_t *ac_list;
   2688 	RF_AutoConfig_t *ac;
   2689 
   2690 
   2691 	/* initialize the AutoConfig list */
   2692 	ac_list = NULL;
   2693 
   2694 if (raidautoconfig) {
   2695 
   2696 	/* we begin by trolling through *all* the devices on the system */
   2697 
   2698 	for (dv = alldevs.tqh_first; dv != NULL;
   2699 	     dv = dv->dv_list.tqe_next) {
   2700 
   2701 		/* we are only interested in disks... */
   2702 		if (dv->dv_class != DV_DISK)
   2703 			continue;
   2704 
   2705 		/* we don't care about floppies... */
   2706 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2707 			continue;
   2708 		}
   2709 
   2710 		/* need to find the device_name_to_block_device_major stuff */
   2711 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2712 		dtobdm = dev_name2blk;
   2713 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2714 			dtobdm++;
   2715 		}
   2716 
   2717 		/* get a vnode for the raw partition of this disk */
   2718 
   2719 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2720 		if (bdevvp(dev, &vp))
   2721 			panic("RAID can't alloc vnode");
   2722 
   2723 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2724 
   2725 		if (error) {
   2726 			/* "Who cares."  Continue looking
   2727 			   for something that exists*/
   2728 			vput(vp);
   2729 			continue;
   2730 		}
   2731 
   2732 		/* Ok, the disk exists.  Go get the disklabel. */
   2733 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2734 				  FREAD, NOCRED, 0);
   2735 		if (error) {
   2736 			/*
   2737 			 * XXX can't happen - open() would
   2738 			 * have errored out (or faked up one)
   2739 			 */
   2740 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2741 			       dv->dv_xname, 'a' + RAW_PART, error);
   2742 		}
   2743 
   2744 		/* don't need this any more.  We'll allocate it again
   2745 		   a little later if we really do... */
   2746 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2747 		vput(vp);
   2748 
   2749 		for (i=0; i < label.d_npartitions; i++) {
   2750 			/* We only support partitions marked as RAID */
   2751 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2752 				continue;
   2753 
   2754 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2755 			if (bdevvp(dev, &vp))
   2756 				panic("RAID can't alloc vnode");
   2757 
   2758 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2759 			if (error) {
   2760 				/* Whatever... */
   2761 				vput(vp);
   2762 				continue;
   2763 			}
   2764 
   2765 			good_one = 0;
   2766 
   2767 			clabel = (RF_ComponentLabel_t *)
   2768 				malloc(sizeof(RF_ComponentLabel_t),
   2769 				       M_RAIDFRAME, M_NOWAIT);
   2770 			if (clabel == NULL) {
   2771 				/* XXX CLEANUP HERE */
   2772 				printf("RAID auto config: out of memory!\n");
   2773 				return(NULL); /* XXX probably should panic? */
   2774 			}
   2775 
   2776 			if (!raidread_component_label(dev, vp, clabel)) {
   2777 				/* Got the label.  Does it look reasonable? */
   2778 				if (rf_reasonable_label(clabel) &&
   2779 				    (clabel->partitionSize <=
   2780 				     label.d_partitions[i].p_size)) {
   2781 #if DEBUG
   2782 					printf("Component on: %s%c: %d\n",
   2783 					       dv->dv_xname, 'a'+i,
   2784 					       label.d_partitions[i].p_size);
   2785 					rf_print_component_label(clabel);
   2786 #endif
   2787 					/* if it's reasonable, add it,
   2788 					   else ignore it. */
   2789 					ac = (RF_AutoConfig_t *)
   2790 						malloc(sizeof(RF_AutoConfig_t),
   2791 						       M_RAIDFRAME,
   2792 						       M_NOWAIT);
   2793 					if (ac == NULL) {
   2794 						/* XXX should panic?? */
   2795 						return(NULL);
   2796 					}
   2797 
   2798 					sprintf(ac->devname, "%s%c",
   2799 						dv->dv_xname, 'a'+i);
   2800 					ac->dev = dev;
   2801 					ac->vp = vp;
   2802 					ac->clabel = clabel;
   2803 					ac->next = ac_list;
   2804 					ac_list = ac;
   2805 					good_one = 1;
   2806 				}
   2807 			}
   2808 			if (!good_one) {
   2809 				/* cleanup */
   2810 				free(clabel, M_RAIDFRAME);
   2811 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2812 				vput(vp);
   2813 			}
   2814 		}
   2815 	}
   2816 }
   2817 return(ac_list);
   2818 }
   2819 
   2820 static int
   2821 rf_reasonable_label(clabel)
   2822 	RF_ComponentLabel_t *clabel;
   2823 {
   2824 
   2825 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2826 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2827 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2828 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2829 	    clabel->row >=0 &&
   2830 	    clabel->column >= 0 &&
   2831 	    clabel->num_rows > 0 &&
   2832 	    clabel->num_columns > 0 &&
   2833 	    clabel->row < clabel->num_rows &&
   2834 	    clabel->column < clabel->num_columns &&
   2835 	    clabel->blockSize > 0 &&
   2836 	    clabel->numBlocks > 0) {
   2837 		/* label looks reasonable enough... */
   2838 		return(1);
   2839 	}
   2840 	return(0);
   2841 }
   2842 
   2843 
   2844 void
   2845 rf_print_component_label(clabel)
   2846 	RF_ComponentLabel_t *clabel;
   2847 {
   2848 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2849 	       clabel->row, clabel->column,
   2850 	       clabel->num_rows, clabel->num_columns);
   2851 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2852 	       clabel->version, clabel->serial_number,
   2853 	       clabel->mod_counter);
   2854 	printf("   Clean: %s Status: %d\n",
   2855 	       clabel->clean ? "Yes" : "No", clabel->status );
   2856 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2857 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2858 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2859 	       (char) clabel->parityConfig, clabel->blockSize,
   2860 	       clabel->numBlocks);
   2861 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2862 	printf("   Contains root partition: %s\n",
   2863 	       clabel->root_partition ? "Yes" : "No" );
   2864 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2865 #if 0
   2866 	   printf("   Config order: %d\n", clabel->config_order);
   2867 #endif
   2868 
   2869 }
   2870 
   2871 RF_ConfigSet_t *
   2872 rf_create_auto_sets(ac_list)
   2873 	RF_AutoConfig_t *ac_list;
   2874 {
   2875 	RF_AutoConfig_t *ac;
   2876 	RF_ConfigSet_t *config_sets;
   2877 	RF_ConfigSet_t *cset;
   2878 	RF_AutoConfig_t *ac_next;
   2879 
   2880 
   2881 	config_sets = NULL;
   2882 
   2883 	/* Go through the AutoConfig list, and figure out which components
   2884 	   belong to what sets.  */
   2885 	ac = ac_list;
   2886 	while(ac!=NULL) {
   2887 		/* we're going to putz with ac->next, so save it here
   2888 		   for use at the end of the loop */
   2889 		ac_next = ac->next;
   2890 
   2891 		if (config_sets == NULL) {
   2892 			/* will need at least this one... */
   2893 			config_sets = (RF_ConfigSet_t *)
   2894 				malloc(sizeof(RF_ConfigSet_t),
   2895 				       M_RAIDFRAME, M_NOWAIT);
   2896 			if (config_sets == NULL) {
   2897 				panic("rf_create_auto_sets: No memory!\n");
   2898 			}
   2899 			/* this one is easy :) */
   2900 			config_sets->ac = ac;
   2901 			config_sets->next = NULL;
   2902 			config_sets->rootable = 0;
   2903 			ac->next = NULL;
   2904 		} else {
   2905 			/* which set does this component fit into? */
   2906 			cset = config_sets;
   2907 			while(cset!=NULL) {
   2908 				if (rf_does_it_fit(cset, ac)) {
   2909 					/* looks like it matches... */
   2910 					ac->next = cset->ac;
   2911 					cset->ac = ac;
   2912 					break;
   2913 				}
   2914 				cset = cset->next;
   2915 			}
   2916 			if (cset==NULL) {
   2917 				/* didn't find a match above... new set..*/
   2918 				cset = (RF_ConfigSet_t *)
   2919 					malloc(sizeof(RF_ConfigSet_t),
   2920 					       M_RAIDFRAME, M_NOWAIT);
   2921 				if (cset == NULL) {
   2922 					panic("rf_create_auto_sets: No memory!\n");
   2923 				}
   2924 				cset->ac = ac;
   2925 				ac->next = NULL;
   2926 				cset->next = config_sets;
   2927 				cset->rootable = 0;
   2928 				config_sets = cset;
   2929 			}
   2930 		}
   2931 		ac = ac_next;
   2932 	}
   2933 
   2934 
   2935 	return(config_sets);
   2936 }
   2937 
   2938 static int
   2939 rf_does_it_fit(cset, ac)
   2940 	RF_ConfigSet_t *cset;
   2941 	RF_AutoConfig_t *ac;
   2942 {
   2943 	RF_ComponentLabel_t *clabel1, *clabel2;
   2944 
   2945 	/* If this one matches the *first* one in the set, that's good
   2946 	   enough, since the other members of the set would have been
   2947 	   through here too... */
   2948 	/* note that we are not checking partitionSize here..
   2949 
   2950 	   Note that we are also not checking the mod_counters here.
   2951 	   If everything else matches execpt the mod_counter, that's
   2952 	   good enough for this test.  We will deal with the mod_counters
   2953 	   a little later in the autoconfiguration process.
   2954 
   2955 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2956 
   2957 	   The reason we don't check for this is that failed disks
   2958 	   will have lower modification counts.  If those disks are
   2959 	   not added to the set they used to belong to, then they will
   2960 	   form their own set, which may result in 2 different sets,
   2961 	   for example, competing to be configured at raid0, and
   2962 	   perhaps competing to be the root filesystem set.  If the
   2963 	   wrong ones get configured, or both attempt to become /,
   2964 	   weird behaviour and or serious lossage will occur.  Thus we
   2965 	   need to bring them into the fold here, and kick them out at
   2966 	   a later point.
   2967 
   2968 	*/
   2969 
   2970 	clabel1 = cset->ac->clabel;
   2971 	clabel2 = ac->clabel;
   2972 	if ((clabel1->version == clabel2->version) &&
   2973 	    (clabel1->serial_number == clabel2->serial_number) &&
   2974 	    (clabel1->num_rows == clabel2->num_rows) &&
   2975 	    (clabel1->num_columns == clabel2->num_columns) &&
   2976 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2977 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2978 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2979 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2980 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2981 	    (clabel1->blockSize == clabel2->blockSize) &&
   2982 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2983 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2984 	    (clabel1->root_partition == clabel2->root_partition) &&
   2985 	    (clabel1->last_unit == clabel2->last_unit) &&
   2986 	    (clabel1->config_order == clabel2->config_order)) {
   2987 		/* if it get's here, it almost *has* to be a match */
   2988 	} else {
   2989 		/* it's not consistent with somebody in the set..
   2990 		   punt */
   2991 		return(0);
   2992 	}
   2993 	/* all was fine.. it must fit... */
   2994 	return(1);
   2995 }
   2996 
   2997 int
   2998 rf_have_enough_components(cset)
   2999 	RF_ConfigSet_t *cset;
   3000 {
   3001 	RF_AutoConfig_t *ac;
   3002 	RF_AutoConfig_t *auto_config;
   3003 	RF_ComponentLabel_t *clabel;
   3004 	int r,c;
   3005 	int num_rows;
   3006 	int num_cols;
   3007 	int num_missing;
   3008 	int mod_counter;
   3009 	int mod_counter_found;
   3010 	int even_pair_failed;
   3011 	char parity_type;
   3012 
   3013 
   3014 	/* check to see that we have enough 'live' components
   3015 	   of this set.  If so, we can configure it if necessary */
   3016 
   3017 	num_rows = cset->ac->clabel->num_rows;
   3018 	num_cols = cset->ac->clabel->num_columns;
   3019 	parity_type = cset->ac->clabel->parityConfig;
   3020 
   3021 	/* XXX Check for duplicate components!?!?!? */
   3022 
   3023 	/* Determine what the mod_counter is supposed to be for this set. */
   3024 
   3025 	mod_counter_found = 0;
   3026 	ac = cset->ac;
   3027 	while(ac!=NULL) {
   3028 		if (mod_counter_found==0) {
   3029 			mod_counter = ac->clabel->mod_counter;
   3030 			mod_counter_found = 1;
   3031 		} else {
   3032 			if (ac->clabel->mod_counter > mod_counter) {
   3033 				mod_counter = ac->clabel->mod_counter;
   3034 			}
   3035 		}
   3036 		ac = ac->next;
   3037 	}
   3038 
   3039 	num_missing = 0;
   3040 	auto_config = cset->ac;
   3041 
   3042 	for(r=0; r<num_rows; r++) {
   3043 		even_pair_failed = 0;
   3044 		for(c=0; c<num_cols; c++) {
   3045 			ac = auto_config;
   3046 			while(ac!=NULL) {
   3047 				if ((ac->clabel->row == r) &&
   3048 				    (ac->clabel->column == c) &&
   3049 				    (ac->clabel->mod_counter == mod_counter)) {
   3050 					/* it's this one... */
   3051 #if DEBUG
   3052 					printf("Found: %s at %d,%d\n",
   3053 					       ac->devname,r,c);
   3054 #endif
   3055 					break;
   3056 				}
   3057 				ac=ac->next;
   3058 			}
   3059 			if (ac==NULL) {
   3060 				/* Didn't find one here! */
   3061 				/* special case for RAID 1, especially
   3062 				   where there are more than 2
   3063 				   components (where RAIDframe treats
   3064 				   things a little differently :( ) */
   3065 				if (parity_type == '1') {
   3066 					if (c%2 == 0) { /* even component */
   3067 						even_pair_failed = 1;
   3068 					} else { /* odd component.  If
   3069                                                     we're failed, and
   3070                                                     so is the even
   3071                                                     component, it's
   3072                                                     "Good Night, Charlie" */
   3073 						if (even_pair_failed == 1) {
   3074 							return(0);
   3075 						}
   3076 					}
   3077 				} else {
   3078 					/* normal accounting */
   3079 					num_missing++;
   3080 				}
   3081 			}
   3082 			if ((parity_type == '1') && (c%2 == 1)) {
   3083 				/* Just did an even component, and we didn't
   3084 				   bail.. reset the even_pair_failed flag,
   3085 				   and go on to the next component.... */
   3086 				even_pair_failed = 0;
   3087 			}
   3088 		}
   3089 	}
   3090 
   3091 	clabel = cset->ac->clabel;
   3092 
   3093 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3094 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3095 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3096 		/* XXX this needs to be made *much* more general */
   3097 		/* Too many failures */
   3098 		return(0);
   3099 	}
   3100 	/* otherwise, all is well, and we've got enough to take a kick
   3101 	   at autoconfiguring this set */
   3102 	return(1);
   3103 }
   3104 
   3105 void
   3106 rf_create_configuration(ac,config,raidPtr)
   3107 	RF_AutoConfig_t *ac;
   3108 	RF_Config_t *config;
   3109 	RF_Raid_t *raidPtr;
   3110 {
   3111 	RF_ComponentLabel_t *clabel;
   3112 	int i;
   3113 
   3114 	clabel = ac->clabel;
   3115 
   3116 	/* 1. Fill in the common stuff */
   3117 	config->numRow = clabel->num_rows;
   3118 	config->numCol = clabel->num_columns;
   3119 	config->numSpare = 0; /* XXX should this be set here? */
   3120 	config->sectPerSU = clabel->sectPerSU;
   3121 	config->SUsPerPU = clabel->SUsPerPU;
   3122 	config->SUsPerRU = clabel->SUsPerRU;
   3123 	config->parityConfig = clabel->parityConfig;
   3124 	/* XXX... */
   3125 	strcpy(config->diskQueueType,"fifo");
   3126 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3127 	config->layoutSpecificSize = 0; /* XXX ?? */
   3128 
   3129 	while(ac!=NULL) {
   3130 		/* row/col values will be in range due to the checks
   3131 		   in reasonable_label() */
   3132 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3133 		       ac->devname);
   3134 		ac = ac->next;
   3135 	}
   3136 
   3137 	for(i=0;i<RF_MAXDBGV;i++) {
   3138 		config->debugVars[i][0] = NULL;
   3139 	}
   3140 }
   3141 
   3142 int
   3143 rf_set_autoconfig(raidPtr, new_value)
   3144 	RF_Raid_t *raidPtr;
   3145 	int new_value;
   3146 {
   3147 	RF_ComponentLabel_t clabel;
   3148 	struct vnode *vp;
   3149 	dev_t dev;
   3150 	int row, column;
   3151 
   3152 	raidPtr->autoconfigure = new_value;
   3153 	for(row=0; row<raidPtr->numRow; row++) {
   3154 		for(column=0; column<raidPtr->numCol; column++) {
   3155 			if (raidPtr->Disks[row][column].status ==
   3156 			    rf_ds_optimal) {
   3157 				dev = raidPtr->Disks[row][column].dev;
   3158 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3159 				raidread_component_label(dev, vp, &clabel);
   3160 				clabel.autoconfigure = new_value;
   3161 				raidwrite_component_label(dev, vp, &clabel);
   3162 			}
   3163 		}
   3164 	}
   3165 	return(new_value);
   3166 }
   3167 
   3168 int
   3169 rf_set_rootpartition(raidPtr, new_value)
   3170 	RF_Raid_t *raidPtr;
   3171 	int new_value;
   3172 {
   3173 	RF_ComponentLabel_t clabel;
   3174 	struct vnode *vp;
   3175 	dev_t dev;
   3176 	int row, column;
   3177 
   3178 	raidPtr->root_partition = new_value;
   3179 	for(row=0; row<raidPtr->numRow; row++) {
   3180 		for(column=0; column<raidPtr->numCol; column++) {
   3181 			if (raidPtr->Disks[row][column].status ==
   3182 			    rf_ds_optimal) {
   3183 				dev = raidPtr->Disks[row][column].dev;
   3184 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3185 				raidread_component_label(dev, vp, &clabel);
   3186 				clabel.root_partition = new_value;
   3187 				raidwrite_component_label(dev, vp, &clabel);
   3188 			}
   3189 		}
   3190 	}
   3191 	return(new_value);
   3192 }
   3193 
   3194 void
   3195 rf_release_all_vps(cset)
   3196 	RF_ConfigSet_t *cset;
   3197 {
   3198 	RF_AutoConfig_t *ac;
   3199 
   3200 	ac = cset->ac;
   3201 	while(ac!=NULL) {
   3202 		/* Close the vp, and give it back */
   3203 		if (ac->vp) {
   3204 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3205 			vput(ac->vp);
   3206 			ac->vp = NULL;
   3207 		}
   3208 		ac = ac->next;
   3209 	}
   3210 }
   3211 
   3212 
   3213 void
   3214 rf_cleanup_config_set(cset)
   3215 	RF_ConfigSet_t *cset;
   3216 {
   3217 	RF_AutoConfig_t *ac;
   3218 	RF_AutoConfig_t *next_ac;
   3219 
   3220 	ac = cset->ac;
   3221 	while(ac!=NULL) {
   3222 		next_ac = ac->next;
   3223 		/* nuke the label */
   3224 		free(ac->clabel, M_RAIDFRAME);
   3225 		/* cleanup the config structure */
   3226 		free(ac, M_RAIDFRAME);
   3227 		/* "next.." */
   3228 		ac = next_ac;
   3229 	}
   3230 	/* and, finally, nuke the config set */
   3231 	free(cset, M_RAIDFRAME);
   3232 }
   3233 
   3234 
   3235 void
   3236 raid_init_component_label(raidPtr, clabel)
   3237 	RF_Raid_t *raidPtr;
   3238 	RF_ComponentLabel_t *clabel;
   3239 {
   3240 	/* current version number */
   3241 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3242 	clabel->serial_number = raidPtr->serial_number;
   3243 	clabel->mod_counter = raidPtr->mod_counter;
   3244 	clabel->num_rows = raidPtr->numRow;
   3245 	clabel->num_columns = raidPtr->numCol;
   3246 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3247 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3248 
   3249 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3250 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3251 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3252 
   3253 	clabel->blockSize = raidPtr->bytesPerSector;
   3254 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3255 
   3256 	/* XXX not portable */
   3257 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3258 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3259 	clabel->autoconfigure = raidPtr->autoconfigure;
   3260 	clabel->root_partition = raidPtr->root_partition;
   3261 	clabel->last_unit = raidPtr->raidid;
   3262 	clabel->config_order = raidPtr->config_order;
   3263 }
   3264 
   3265 int
   3266 rf_auto_config_set(cset,unit)
   3267 	RF_ConfigSet_t *cset;
   3268 	int *unit;
   3269 {
   3270 	RF_Raid_t *raidPtr;
   3271 	RF_Config_t *config;
   3272 	int raidID;
   3273 	int retcode;
   3274 
   3275 	printf("RAID autoconfigure\n");
   3276 
   3277 	retcode = 0;
   3278 	*unit = -1;
   3279 
   3280 	/* 1. Create a config structure */
   3281 
   3282 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3283 				       M_RAIDFRAME,
   3284 				       M_NOWAIT);
   3285 	if (config==NULL) {
   3286 		printf("Out of mem!?!?\n");
   3287 				/* XXX do something more intelligent here. */
   3288 		return(1);
   3289 	}
   3290 
   3291 	memset(config, 0, sizeof(RF_Config_t));
   3292 
   3293 	/* XXX raidID needs to be set correctly.. */
   3294 
   3295 	/*
   3296 	   2. Figure out what RAID ID this one is supposed to live at
   3297 	   See if we can get the same RAID dev that it was configured
   3298 	   on last time..
   3299 	*/
   3300 
   3301 	raidID = cset->ac->clabel->last_unit;
   3302 	if ((raidID < 0) || (raidID >= numraid)) {
   3303 		/* let's not wander off into lala land. */
   3304 		raidID = numraid - 1;
   3305 	}
   3306 	if (raidPtrs[raidID]->valid != 0) {
   3307 
   3308 		/*
   3309 		   Nope... Go looking for an alternative...
   3310 		   Start high so we don't immediately use raid0 if that's
   3311 		   not taken.
   3312 		*/
   3313 
   3314 		for(raidID = numraid; raidID >= 0; raidID--) {
   3315 			if (raidPtrs[raidID]->valid == 0) {
   3316 				/* can use this one! */
   3317 				break;
   3318 			}
   3319 		}
   3320 	}
   3321 
   3322 	if (raidID < 0) {
   3323 		/* punt... */
   3324 		printf("Unable to auto configure this set!\n");
   3325 		printf("(Out of RAID devs!)\n");
   3326 		return(1);
   3327 	}
   3328 	printf("Configuring raid%d:\n",raidID);
   3329 	raidPtr = raidPtrs[raidID];
   3330 
   3331 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3332 	raidPtr->raidid = raidID;
   3333 	raidPtr->openings = RAIDOUTSTANDING;
   3334 
   3335 	/* 3. Build the configuration structure */
   3336 	rf_create_configuration(cset->ac, config, raidPtr);
   3337 
   3338 	/* 4. Do the configuration */
   3339 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3340 
   3341 	if (retcode == 0) {
   3342 
   3343 		raidinit(raidPtrs[raidID]);
   3344 
   3345 		rf_markalldirty(raidPtrs[raidID]);
   3346 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3347 		if (cset->ac->clabel->root_partition==1) {
   3348 			/* everything configured just fine.  Make a note
   3349 			   that this set is eligible to be root. */
   3350 			cset->rootable = 1;
   3351 			/* XXX do this here? */
   3352 			raidPtrs[raidID]->root_partition = 1;
   3353 		}
   3354 	}
   3355 
   3356 	/* 5. Cleanup */
   3357 	free(config, M_RAIDFRAME);
   3358 
   3359 	*unit = raidID;
   3360 	return(retcode);
   3361 }
   3362