Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.145
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.145 2002/11/01 11:31:59 mrg Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.145 2002/11/01 11:31:59 mrg Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_threadstuff.h"
    155 
    156 #ifdef DEBUG
    157 int     rf_kdebug_level = 0;
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit(RF_Raid_t *);
    180 
    181 void raidattach(int);
    182 
    183 dev_type_open(raidopen);
    184 dev_type_close(raidclose);
    185 dev_type_read(raidread);
    186 dev_type_write(raidwrite);
    187 dev_type_ioctl(raidioctl);
    188 dev_type_strategy(raidstrategy);
    189 dev_type_dump(raiddump);
    190 dev_type_size(raidsize);
    191 
    192 const struct bdevsw raid_bdevsw = {
    193 	raidopen, raidclose, raidstrategy, raidioctl,
    194 	raiddump, raidsize, D_DISK
    195 };
    196 
    197 const struct cdevsw raid_cdevsw = {
    198 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    199 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    200 };
    201 
    202 /*
    203  * Pilfered from ccd.c
    204  */
    205 
    206 struct raidbuf {
    207 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    208 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    209 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    210 };
    211 
    212 /* component buffer pool */
    213 struct pool raidframe_cbufpool;
    214 
    215 /* XXX Not sure if the following should be replacing the raidPtrs above,
    216    or if it should be used in conjunction with that...
    217 */
    218 
    219 struct raid_softc {
    220 	int     sc_flags;	/* flags */
    221 	int     sc_cflags;	/* configuration flags */
    222 	size_t  sc_size;        /* size of the raid device */
    223 	char    sc_xname[20];	/* XXX external name */
    224 	struct disk sc_dkdev;	/* generic disk device info */
    225 	struct bufq_state buf_queue;	/* used for the device queue */
    226 };
    227 /* sc_flags */
    228 #define RAIDF_INITED	0x01	/* unit has been initialized */
    229 #define RAIDF_WLABEL	0x02	/* label area is writable */
    230 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    231 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    232 #define RAIDF_LOCKED	0x80	/* unit is locked */
    233 
    234 #define	raidunit(x)	DISKUNIT(x)
    235 int numraid = 0;
    236 
    237 /*
    238  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    239  * Be aware that large numbers can allow the driver to consume a lot of
    240  * kernel memory, especially on writes, and in degraded mode reads.
    241  *
    242  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    243  * a single 64K write will typically require 64K for the old data,
    244  * 64K for the old parity, and 64K for the new parity, for a total
    245  * of 192K (if the parity buffer is not re-used immediately).
    246  * Even it if is used immediately, that's still 128K, which when multiplied
    247  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    248  *
    249  * Now in degraded mode, for example, a 64K read on the above setup may
    250  * require data reconstruction, which will require *all* of the 4 remaining
    251  * disks to participate -- 4 * 32K/disk == 128K again.
    252  */
    253 
    254 #ifndef RAIDOUTSTANDING
    255 #define RAIDOUTSTANDING   6
    256 #endif
    257 
    258 #define RAIDLABELDEV(dev)	\
    259 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    260 
    261 /* declared here, and made public, for the benefit of KVM stuff.. */
    262 struct raid_softc *raid_softc;
    263 
    264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    265 				     struct disklabel *);
    266 static void raidgetdisklabel(dev_t);
    267 static void raidmakedisklabel(struct raid_softc *);
    268 
    269 static int raidlock(struct raid_softc *);
    270 static void raidunlock(struct raid_softc *);
    271 
    272 static void rf_markalldirty(RF_Raid_t *);
    273 
    274 struct device *raidrootdev;
    275 
    276 void rf_ReconThread(struct rf_recon_req *);
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    280 void rf_CopybackThread(RF_Raid_t *raidPtr);
    281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    282 int rf_autoconfig(struct device *self);
    283 void rf_buildroothack(RF_ConfigSet_t *);
    284 
    285 RF_AutoConfig_t *rf_find_raid_components(void);
    286 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    287 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    288 static int rf_reasonable_label(RF_ComponentLabel_t *);
    289 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    290 int rf_set_autoconfig(RF_Raid_t *, int);
    291 int rf_set_rootpartition(RF_Raid_t *, int);
    292 void rf_release_all_vps(RF_ConfigSet_t *);
    293 void rf_cleanup_config_set(RF_ConfigSet_t *);
    294 int rf_have_enough_components(RF_ConfigSet_t *);
    295 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    296 
    297 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    298 				  allow autoconfig to take place.
    299 			          Note that this is overridden by having
    300 			          RAID_AUTOCONFIG as an option in the
    301 			          kernel config file.  */
    302 
    303 void
    304 raidattach(num)
    305 	int     num;
    306 {
    307 	int raidID;
    308 	int i, rc;
    309 
    310 #ifdef DEBUG
    311 	printf("raidattach: Asked for %d units\n", num);
    312 #endif
    313 
    314 	if (num <= 0) {
    315 #ifdef DIAGNOSTIC
    316 		panic("raidattach: count <= 0");
    317 #endif
    318 		return;
    319 	}
    320 	/* This is where all the initialization stuff gets done. */
    321 
    322 	numraid = num;
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!");
    329 	}
    330 
    331 	/* Initialize the component buffer pool. */
    332 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    333 	    0, 0, "raidpl", NULL);
    334 
    335 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    336 	if (rc) {
    337 		RF_PANIC();
    338 	}
    339 
    340 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    341 
    342 	for (i = 0; i < num; i++)
    343 		raidPtrs[i] = NULL;
    344 	rc = rf_BootRaidframe();
    345 	if (rc == 0)
    346 		printf("Kernelized RAIDframe activated\n");
    347 	else
    348 		panic("Serious error booting RAID!!");
    349 
    350 	/* put together some datastructures like the CCD device does.. This
    351 	 * lets us lock the device and what-not when it gets opened. */
    352 
    353 	raid_softc = (struct raid_softc *)
    354 		malloc(num * sizeof(struct raid_softc),
    355 		       M_RAIDFRAME, M_NOWAIT);
    356 	if (raid_softc == NULL) {
    357 		printf("WARNING: no memory for RAIDframe driver\n");
    358 		return;
    359 	}
    360 
    361 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    362 
    363 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    364 					      M_RAIDFRAME, M_NOWAIT);
    365 	if (raidrootdev == NULL) {
    366 		panic("No memory for RAIDframe driver!!?!?!");
    367 	}
    368 
    369 	for (raidID = 0; raidID < num; raidID++) {
    370 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    371 
    372 		raidrootdev[raidID].dv_class  = DV_DISK;
    373 		raidrootdev[raidID].dv_cfdata = NULL;
    374 		raidrootdev[raidID].dv_unit   = raidID;
    375 		raidrootdev[raidID].dv_parent = NULL;
    376 		raidrootdev[raidID].dv_flags  = 0;
    377 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    378 
    379 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    380 			  (RF_Raid_t *));
    381 		if (raidPtrs[raidID] == NULL) {
    382 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    383 			numraid = raidID;
    384 			return;
    385 		}
    386 	}
    387 
    388 #ifdef RAID_AUTOCONFIG
    389 	raidautoconfig = 1;
    390 #endif
    391 
    392 	/*
    393 	 * Register a finalizer which will be used to auto-config RAID
    394 	 * sets once all real hardware devices have been found.
    395 	 */
    396 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    397 		printf("WARNING: unable to register RAIDframe finalizer\n");
    398 }
    399 
    400 int
    401 rf_autoconfig(struct device *self)
    402 {
    403 	RF_AutoConfig_t *ac_list;
    404 	RF_ConfigSet_t *config_sets;
    405 
    406 	if (raidautoconfig == 0)
    407 		return (0);
    408 
    409 	/* XXX This code can only be run once. */
    410 	raidautoconfig = 0;
    411 
    412 	/* 1. locate all RAID components on the system */
    413 #ifdef DEBUG
    414 	printf("Searching for RAID components...\n");
    415 #endif
    416 	ac_list = rf_find_raid_components();
    417 
    418 	/* 2. Sort them into their respective sets. */
    419 	config_sets = rf_create_auto_sets(ac_list);
    420 
    421 	/*
    422 	 * 3. Evaluate each set andconfigure the valid ones.
    423 	 * This gets done in rf_buildroothack().
    424 	 */
    425 	rf_buildroothack(config_sets);
    426 
    427 	return (1);
    428 }
    429 
    430 void
    431 rf_buildroothack(RF_ConfigSet_t *config_sets)
    432 {
    433 	RF_ConfigSet_t *cset;
    434 	RF_ConfigSet_t *next_cset;
    435 	int retcode;
    436 	int raidID;
    437 	int rootID;
    438 	int num_root;
    439 
    440 	rootID = 0;
    441 	num_root = 0;
    442 	cset = config_sets;
    443 	while(cset != NULL ) {
    444 		next_cset = cset->next;
    445 		if (rf_have_enough_components(cset) &&
    446 		    cset->ac->clabel->autoconfigure==1) {
    447 			retcode = rf_auto_config_set(cset,&raidID);
    448 			if (!retcode) {
    449 				if (cset->rootable) {
    450 					rootID = raidID;
    451 					num_root++;
    452 				}
    453 			} else {
    454 				/* The autoconfig didn't work :( */
    455 #if DEBUG
    456 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    457 #endif
    458 				rf_release_all_vps(cset);
    459 			}
    460 		} else {
    461 			/* we're not autoconfiguring this set...
    462 			   release the associated resources */
    463 			rf_release_all_vps(cset);
    464 		}
    465 		/* cleanup */
    466 		rf_cleanup_config_set(cset);
    467 		cset = next_cset;
    468 	}
    469 
    470 	/* we found something bootable... */
    471 
    472 	if (num_root == 1) {
    473 		booted_device = &raidrootdev[rootID];
    474 	} else if (num_root > 1) {
    475 		/* we can't guess.. require the user to answer... */
    476 		boothowto |= RB_ASKNAME;
    477 	}
    478 }
    479 
    480 
    481 int
    482 raidsize(dev)
    483 	dev_t   dev;
    484 {
    485 	struct raid_softc *rs;
    486 	struct disklabel *lp;
    487 	int     part, unit, omask, size;
    488 
    489 	unit = raidunit(dev);
    490 	if (unit >= numraid)
    491 		return (-1);
    492 	rs = &raid_softc[unit];
    493 
    494 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    495 		return (-1);
    496 
    497 	part = DISKPART(dev);
    498 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    499 	lp = rs->sc_dkdev.dk_label;
    500 
    501 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    502 		return (-1);
    503 
    504 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    505 		size = -1;
    506 	else
    507 		size = lp->d_partitions[part].p_size *
    508 		    (lp->d_secsize / DEV_BSIZE);
    509 
    510 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    511 		return (-1);
    512 
    513 	return (size);
    514 
    515 }
    516 
    517 int
    518 raiddump(dev, blkno, va, size)
    519 	dev_t   dev;
    520 	daddr_t blkno;
    521 	caddr_t va;
    522 	size_t  size;
    523 {
    524 	/* Not implemented. */
    525 	return ENXIO;
    526 }
    527 /* ARGSUSED */
    528 int
    529 raidopen(dev, flags, fmt, p)
    530 	dev_t   dev;
    531 	int     flags, fmt;
    532 	struct proc *p;
    533 {
    534 	int     unit = raidunit(dev);
    535 	struct raid_softc *rs;
    536 	struct disklabel *lp;
    537 	int     part, pmask;
    538 	int     error = 0;
    539 
    540 	if (unit >= numraid)
    541 		return (ENXIO);
    542 	rs = &raid_softc[unit];
    543 
    544 	if ((error = raidlock(rs)) != 0)
    545 		return (error);
    546 	lp = rs->sc_dkdev.dk_label;
    547 
    548 	part = DISKPART(dev);
    549 	pmask = (1 << part);
    550 
    551 	db1_printf(("Opening raid device number: %d partition: %d\n",
    552 		unit, part));
    553 
    554 
    555 	if ((rs->sc_flags & RAIDF_INITED) &&
    556 	    (rs->sc_dkdev.dk_openmask == 0))
    557 		raidgetdisklabel(dev);
    558 
    559 	/* make sure that this partition exists */
    560 
    561 	if (part != RAW_PART) {
    562 		db1_printf(("Not a raw partition..\n"));
    563 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    564 		    ((part >= lp->d_npartitions) ||
    565 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    566 			error = ENXIO;
    567 			raidunlock(rs);
    568 			db1_printf(("Bailing out...\n"));
    569 			return (error);
    570 		}
    571 	}
    572 	/* Prevent this unit from being unconfigured while open. */
    573 	switch (fmt) {
    574 	case S_IFCHR:
    575 		rs->sc_dkdev.dk_copenmask |= pmask;
    576 		break;
    577 
    578 	case S_IFBLK:
    579 		rs->sc_dkdev.dk_bopenmask |= pmask;
    580 		break;
    581 	}
    582 
    583 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    584 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    585 		/* First one... mark things as dirty... Note that we *MUST*
    586 		 have done a configure before this.  I DO NOT WANT TO BE
    587 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    588 		 THAT THEY BELONG TOGETHER!!!!! */
    589 		/* XXX should check to see if we're only open for reading
    590 		   here... If so, we needn't do this, but then need some
    591 		   other way of keeping track of what's happened.. */
    592 
    593 		rf_markalldirty( raidPtrs[unit] );
    594 	}
    595 
    596 
    597 	rs->sc_dkdev.dk_openmask =
    598 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    599 
    600 	raidunlock(rs);
    601 
    602 	return (error);
    603 
    604 
    605 }
    606 /* ARGSUSED */
    607 int
    608 raidclose(dev, flags, fmt, p)
    609 	dev_t   dev;
    610 	int     flags, fmt;
    611 	struct proc *p;
    612 {
    613 	int     unit = raidunit(dev);
    614 	struct raid_softc *rs;
    615 	int     error = 0;
    616 	int     part;
    617 
    618 	if (unit >= numraid)
    619 		return (ENXIO);
    620 	rs = &raid_softc[unit];
    621 
    622 	if ((error = raidlock(rs)) != 0)
    623 		return (error);
    624 
    625 	part = DISKPART(dev);
    626 
    627 	/* ...that much closer to allowing unconfiguration... */
    628 	switch (fmt) {
    629 	case S_IFCHR:
    630 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    631 		break;
    632 
    633 	case S_IFBLK:
    634 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    635 		break;
    636 	}
    637 	rs->sc_dkdev.dk_openmask =
    638 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    639 
    640 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    641 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    642 		/* Last one... device is not unconfigured yet.
    643 		   Device shutdown has taken care of setting the
    644 		   clean bits if RAIDF_INITED is not set
    645 		   mark things as clean... */
    646 #if 0
    647 		printf("Last one on raid%d.  Updating status.\n",unit);
    648 #endif
    649 		rf_update_component_labels(raidPtrs[unit],
    650 						 RF_FINAL_COMPONENT_UPDATE);
    651 		if (doing_shutdown) {
    652 			/* last one, and we're going down, so
    653 			   lights out for this RAID set too. */
    654 			error = rf_Shutdown(raidPtrs[unit]);
    655 
    656 			/* It's no longer initialized... */
    657 			rs->sc_flags &= ~RAIDF_INITED;
    658 
    659 			/* Detach the disk. */
    660 			disk_detach(&rs->sc_dkdev);
    661 		}
    662 	}
    663 
    664 	raidunlock(rs);
    665 	return (0);
    666 
    667 }
    668 
    669 void
    670 raidstrategy(bp)
    671 	struct buf *bp;
    672 {
    673 	int s;
    674 
    675 	unsigned int raidID = raidunit(bp->b_dev);
    676 	RF_Raid_t *raidPtr;
    677 	struct raid_softc *rs = &raid_softc[raidID];
    678 	struct disklabel *lp;
    679 	int     wlabel;
    680 
    681 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    682 		bp->b_error = ENXIO;
    683 		bp->b_flags |= B_ERROR;
    684 		bp->b_resid = bp->b_bcount;
    685 		biodone(bp);
    686 		return;
    687 	}
    688 	if (raidID >= numraid || !raidPtrs[raidID]) {
    689 		bp->b_error = ENODEV;
    690 		bp->b_flags |= B_ERROR;
    691 		bp->b_resid = bp->b_bcount;
    692 		biodone(bp);
    693 		return;
    694 	}
    695 	raidPtr = raidPtrs[raidID];
    696 	if (!raidPtr->valid) {
    697 		bp->b_error = ENODEV;
    698 		bp->b_flags |= B_ERROR;
    699 		bp->b_resid = bp->b_bcount;
    700 		biodone(bp);
    701 		return;
    702 	}
    703 	if (bp->b_bcount == 0) {
    704 		db1_printf(("b_bcount is zero..\n"));
    705 		biodone(bp);
    706 		return;
    707 	}
    708 	lp = rs->sc_dkdev.dk_label;
    709 
    710 	/*
    711 	 * Do bounds checking and adjust transfer.  If there's an
    712 	 * error, the bounds check will flag that for us.
    713 	 */
    714 
    715 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    716 	if (DISKPART(bp->b_dev) != RAW_PART)
    717 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    718 			db1_printf(("Bounds check failed!!:%d %d\n",
    719 				(int) bp->b_blkno, (int) wlabel));
    720 			biodone(bp);
    721 			return;
    722 		}
    723 	s = splbio();
    724 
    725 	bp->b_resid = 0;
    726 
    727 	/* stuff it onto our queue */
    728 	BUFQ_PUT(&rs->buf_queue, bp);
    729 
    730 	raidstart(raidPtrs[raidID]);
    731 
    732 	splx(s);
    733 }
    734 /* ARGSUSED */
    735 int
    736 raidread(dev, uio, flags)
    737 	dev_t   dev;
    738 	struct uio *uio;
    739 	int     flags;
    740 {
    741 	int     unit = raidunit(dev);
    742 	struct raid_softc *rs;
    743 	int     part;
    744 
    745 	if (unit >= numraid)
    746 		return (ENXIO);
    747 	rs = &raid_softc[unit];
    748 
    749 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    750 		return (ENXIO);
    751 	part = DISKPART(dev);
    752 
    753 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    754 
    755 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    756 
    757 }
    758 /* ARGSUSED */
    759 int
    760 raidwrite(dev, uio, flags)
    761 	dev_t   dev;
    762 	struct uio *uio;
    763 	int     flags;
    764 {
    765 	int     unit = raidunit(dev);
    766 	struct raid_softc *rs;
    767 
    768 	if (unit >= numraid)
    769 		return (ENXIO);
    770 	rs = &raid_softc[unit];
    771 
    772 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    773 		return (ENXIO);
    774 	db1_printf(("raidwrite\n"));
    775 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    776 
    777 }
    778 
    779 int
    780 raidioctl(dev, cmd, data, flag, p)
    781 	dev_t   dev;
    782 	u_long  cmd;
    783 	caddr_t data;
    784 	int     flag;
    785 	struct proc *p;
    786 {
    787 	int     unit = raidunit(dev);
    788 	int     error = 0;
    789 	int     part, pmask;
    790 	struct raid_softc *rs;
    791 	RF_Config_t *k_cfg, *u_cfg;
    792 	RF_Raid_t *raidPtr;
    793 	RF_RaidDisk_t *diskPtr;
    794 	RF_AccTotals_t *totals;
    795 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    796 	u_char *specific_buf;
    797 	int retcode = 0;
    798 	int row;
    799 	int column;
    800 	int raidid;
    801 	struct rf_recon_req *rrcopy, *rr;
    802 	RF_ComponentLabel_t *clabel;
    803 	RF_ComponentLabel_t ci_label;
    804 	RF_ComponentLabel_t **clabel_ptr;
    805 	RF_SingleComponent_t *sparePtr,*componentPtr;
    806 	RF_SingleComponent_t hot_spare;
    807 	RF_SingleComponent_t component;
    808 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    809 	int i, j, d;
    810 #ifdef __HAVE_OLD_DISKLABEL
    811 	struct disklabel newlabel;
    812 #endif
    813 
    814 	if (unit >= numraid)
    815 		return (ENXIO);
    816 	rs = &raid_softc[unit];
    817 	raidPtr = raidPtrs[unit];
    818 
    819 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    820 		(int) DISKPART(dev), (int) unit, (int) cmd));
    821 
    822 	/* Must be open for writes for these commands... */
    823 	switch (cmd) {
    824 	case DIOCSDINFO:
    825 	case DIOCWDINFO:
    826 #ifdef __HAVE_OLD_DISKLABEL
    827 	case ODIOCWDINFO:
    828 	case ODIOCSDINFO:
    829 #endif
    830 	case DIOCWLABEL:
    831 		if ((flag & FWRITE) == 0)
    832 			return (EBADF);
    833 	}
    834 
    835 	/* Must be initialized for these... */
    836 	switch (cmd) {
    837 	case DIOCGDINFO:
    838 	case DIOCSDINFO:
    839 	case DIOCWDINFO:
    840 #ifdef __HAVE_OLD_DISKLABEL
    841 	case ODIOCGDINFO:
    842 	case ODIOCWDINFO:
    843 	case ODIOCSDINFO:
    844 	case ODIOCGDEFLABEL:
    845 #endif
    846 	case DIOCGPART:
    847 	case DIOCWLABEL:
    848 	case DIOCGDEFLABEL:
    849 	case RAIDFRAME_SHUTDOWN:
    850 	case RAIDFRAME_REWRITEPARITY:
    851 	case RAIDFRAME_GET_INFO:
    852 	case RAIDFRAME_RESET_ACCTOTALS:
    853 	case RAIDFRAME_GET_ACCTOTALS:
    854 	case RAIDFRAME_KEEP_ACCTOTALS:
    855 	case RAIDFRAME_GET_SIZE:
    856 	case RAIDFRAME_FAIL_DISK:
    857 	case RAIDFRAME_COPYBACK:
    858 	case RAIDFRAME_CHECK_RECON_STATUS:
    859 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    860 	case RAIDFRAME_GET_COMPONENT_LABEL:
    861 	case RAIDFRAME_SET_COMPONENT_LABEL:
    862 	case RAIDFRAME_ADD_HOT_SPARE:
    863 	case RAIDFRAME_REMOVE_HOT_SPARE:
    864 	case RAIDFRAME_INIT_LABELS:
    865 	case RAIDFRAME_REBUILD_IN_PLACE:
    866 	case RAIDFRAME_CHECK_PARITY:
    867 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    868 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    869 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    870 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    871 	case RAIDFRAME_SET_AUTOCONFIG:
    872 	case RAIDFRAME_SET_ROOT:
    873 	case RAIDFRAME_DELETE_COMPONENT:
    874 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    875 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    876 			return (ENXIO);
    877 	}
    878 
    879 	switch (cmd) {
    880 
    881 		/* configure the system */
    882 	case RAIDFRAME_CONFIGURE:
    883 
    884 		if (raidPtr->valid) {
    885 			/* There is a valid RAID set running on this unit! */
    886 			printf("raid%d: Device already configured!\n",unit);
    887 			return(EINVAL);
    888 		}
    889 
    890 		/* copy-in the configuration information */
    891 		/* data points to a pointer to the configuration structure */
    892 
    893 		u_cfg = *((RF_Config_t **) data);
    894 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    895 		if (k_cfg == NULL) {
    896 			return (ENOMEM);
    897 		}
    898 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    899 		    sizeof(RF_Config_t));
    900 		if (retcode) {
    901 			RF_Free(k_cfg, sizeof(RF_Config_t));
    902 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    903 				retcode));
    904 			return (retcode);
    905 		}
    906 		/* allocate a buffer for the layout-specific data, and copy it
    907 		 * in */
    908 		if (k_cfg->layoutSpecificSize) {
    909 			if (k_cfg->layoutSpecificSize > 10000) {
    910 				/* sanity check */
    911 				RF_Free(k_cfg, sizeof(RF_Config_t));
    912 				return (EINVAL);
    913 			}
    914 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    915 			    (u_char *));
    916 			if (specific_buf == NULL) {
    917 				RF_Free(k_cfg, sizeof(RF_Config_t));
    918 				return (ENOMEM);
    919 			}
    920 			retcode = copyin(k_cfg->layoutSpecific,
    921 			    (caddr_t) specific_buf,
    922 			    k_cfg->layoutSpecificSize);
    923 			if (retcode) {
    924 				RF_Free(k_cfg, sizeof(RF_Config_t));
    925 				RF_Free(specific_buf,
    926 					k_cfg->layoutSpecificSize);
    927 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    928 					retcode));
    929 				return (retcode);
    930 			}
    931 		} else
    932 			specific_buf = NULL;
    933 		k_cfg->layoutSpecific = specific_buf;
    934 
    935 		/* should do some kind of sanity check on the configuration.
    936 		 * Store the sum of all the bytes in the last byte? */
    937 
    938 		/* configure the system */
    939 
    940 		/*
    941 		 * Clear the entire RAID descriptor, just to make sure
    942 		 *  there is no stale data left in the case of a
    943 		 *  reconfiguration
    944 		 */
    945 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    946 		raidPtr->raidid = unit;
    947 
    948 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    949 
    950 		if (retcode == 0) {
    951 
    952 			/* allow this many simultaneous IO's to
    953 			   this RAID device */
    954 			raidPtr->openings = RAIDOUTSTANDING;
    955 
    956 			raidinit(raidPtr);
    957 			rf_markalldirty(raidPtr);
    958 		}
    959 		/* free the buffers.  No return code here. */
    960 		if (k_cfg->layoutSpecificSize) {
    961 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    962 		}
    963 		RF_Free(k_cfg, sizeof(RF_Config_t));
    964 
    965 		return (retcode);
    966 
    967 		/* shutdown the system */
    968 	case RAIDFRAME_SHUTDOWN:
    969 
    970 		if ((error = raidlock(rs)) != 0)
    971 			return (error);
    972 
    973 		/*
    974 		 * If somebody has a partition mounted, we shouldn't
    975 		 * shutdown.
    976 		 */
    977 
    978 		part = DISKPART(dev);
    979 		pmask = (1 << part);
    980 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    981 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    982 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    983 			raidunlock(rs);
    984 			return (EBUSY);
    985 		}
    986 
    987 		retcode = rf_Shutdown(raidPtr);
    988 
    989 		/* It's no longer initialized... */
    990 		rs->sc_flags &= ~RAIDF_INITED;
    991 
    992 		/* Detach the disk. */
    993 		disk_detach(&rs->sc_dkdev);
    994 
    995 		raidunlock(rs);
    996 
    997 		return (retcode);
    998 	case RAIDFRAME_GET_COMPONENT_LABEL:
    999 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1000 		/* need to read the component label for the disk indicated
   1001 		   by row,column in clabel */
   1002 
   1003 		/* For practice, let's get it directly fromdisk, rather
   1004 		   than from the in-core copy */
   1005 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1006 			   (RF_ComponentLabel_t *));
   1007 		if (clabel == NULL)
   1008 			return (ENOMEM);
   1009 
   1010 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1011 
   1012 		retcode = copyin( *clabel_ptr, clabel,
   1013 				  sizeof(RF_ComponentLabel_t));
   1014 
   1015 		if (retcode) {
   1016 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1017 			return(retcode);
   1018 		}
   1019 
   1020 		row = clabel->row;
   1021 		column = clabel->column;
   1022 
   1023 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1024 		    (column < 0) || (column >= raidPtr->numCol +
   1025 				     raidPtr->numSpare)) {
   1026 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1027 			return(EINVAL);
   1028 		}
   1029 
   1030 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1031 				raidPtr->raid_cinfo[row][column].ci_vp,
   1032 				clabel );
   1033 
   1034 		retcode = copyout((caddr_t) clabel,
   1035 				  (caddr_t) *clabel_ptr,
   1036 				  sizeof(RF_ComponentLabel_t));
   1037 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1038 		return (retcode);
   1039 
   1040 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1041 		clabel = (RF_ComponentLabel_t *) data;
   1042 
   1043 		/* XXX check the label for valid stuff... */
   1044 		/* Note that some things *should not* get modified --
   1045 		   the user should be re-initing the labels instead of
   1046 		   trying to patch things.
   1047 		   */
   1048 
   1049 		raidid = raidPtr->raidid;
   1050 		printf("raid%d: Got component label:\n", raidid);
   1051 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1052 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1053 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1054 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1055 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1056 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1057 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1058 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1059 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1060 
   1061 		row = clabel->row;
   1062 		column = clabel->column;
   1063 
   1064 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1065 		    (column < 0) || (column >= raidPtr->numCol)) {
   1066 			return(EINVAL);
   1067 		}
   1068 
   1069 		/* XXX this isn't allowed to do anything for now :-) */
   1070 
   1071 		/* XXX and before it is, we need to fill in the rest
   1072 		   of the fields!?!?!?! */
   1073 #if 0
   1074 		raidwrite_component_label(
   1075                             raidPtr->Disks[row][column].dev,
   1076 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1077 			    clabel );
   1078 #endif
   1079 		return (0);
   1080 
   1081 	case RAIDFRAME_INIT_LABELS:
   1082 		clabel = (RF_ComponentLabel_t *) data;
   1083 		/*
   1084 		   we only want the serial number from
   1085 		   the above.  We get all the rest of the information
   1086 		   from the config that was used to create this RAID
   1087 		   set.
   1088 		   */
   1089 
   1090 		raidPtr->serial_number = clabel->serial_number;
   1091 
   1092 		raid_init_component_label(raidPtr, &ci_label);
   1093 		ci_label.serial_number = clabel->serial_number;
   1094 
   1095 		for(row=0;row<raidPtr->numRow;row++) {
   1096 			ci_label.row = row;
   1097 			for(column=0;column<raidPtr->numCol;column++) {
   1098 				diskPtr = &raidPtr->Disks[row][column];
   1099 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1100 					ci_label.partitionSize = diskPtr->partitionSize;
   1101 					ci_label.column = column;
   1102 					raidwrite_component_label(
   1103 					  raidPtr->Disks[row][column].dev,
   1104 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1105 					  &ci_label );
   1106 				}
   1107 			}
   1108 		}
   1109 
   1110 		return (retcode);
   1111 	case RAIDFRAME_SET_AUTOCONFIG:
   1112 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1113 		printf("raid%d: New autoconfig value is: %d\n",
   1114 		       raidPtr->raidid, d);
   1115 		*(int *) data = d;
   1116 		return (retcode);
   1117 
   1118 	case RAIDFRAME_SET_ROOT:
   1119 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1120 		printf("raid%d: New rootpartition value is: %d\n",
   1121 		       raidPtr->raidid, d);
   1122 		*(int *) data = d;
   1123 		return (retcode);
   1124 
   1125 		/* initialize all parity */
   1126 	case RAIDFRAME_REWRITEPARITY:
   1127 
   1128 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1129 			/* Parity for RAID 0 is trivially correct */
   1130 			raidPtr->parity_good = RF_RAID_CLEAN;
   1131 			return(0);
   1132 		}
   1133 
   1134 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1135 			/* Re-write is already in progress! */
   1136 			return(EINVAL);
   1137 		}
   1138 
   1139 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1140 					   rf_RewriteParityThread,
   1141 					   raidPtr,"raid_parity");
   1142 		return (retcode);
   1143 
   1144 
   1145 	case RAIDFRAME_ADD_HOT_SPARE:
   1146 		sparePtr = (RF_SingleComponent_t *) data;
   1147 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1148 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1149 		return(retcode);
   1150 
   1151 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1152 		return(retcode);
   1153 
   1154 	case RAIDFRAME_DELETE_COMPONENT:
   1155 		componentPtr = (RF_SingleComponent_t *)data;
   1156 		memcpy( &component, componentPtr,
   1157 			sizeof(RF_SingleComponent_t));
   1158 		retcode = rf_delete_component(raidPtr, &component);
   1159 		return(retcode);
   1160 
   1161 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1162 		componentPtr = (RF_SingleComponent_t *)data;
   1163 		memcpy( &component, componentPtr,
   1164 			sizeof(RF_SingleComponent_t));
   1165 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1166 		return(retcode);
   1167 
   1168 	case RAIDFRAME_REBUILD_IN_PLACE:
   1169 
   1170 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1171 			/* Can't do this on a RAID 0!! */
   1172 			return(EINVAL);
   1173 		}
   1174 
   1175 		if (raidPtr->recon_in_progress == 1) {
   1176 			/* a reconstruct is already in progress! */
   1177 			return(EINVAL);
   1178 		}
   1179 
   1180 		componentPtr = (RF_SingleComponent_t *) data;
   1181 		memcpy( &component, componentPtr,
   1182 			sizeof(RF_SingleComponent_t));
   1183 		row = component.row;
   1184 		column = component.column;
   1185 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1186 		       row, column);
   1187 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1188 		    (column < 0) || (column >= raidPtr->numCol)) {
   1189 			return(EINVAL);
   1190 		}
   1191 
   1192 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1193 		if (rrcopy == NULL)
   1194 			return(ENOMEM);
   1195 
   1196 		rrcopy->raidPtr = (void *) raidPtr;
   1197 		rrcopy->row = row;
   1198 		rrcopy->col = column;
   1199 
   1200 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1201 					   rf_ReconstructInPlaceThread,
   1202 					   rrcopy,"raid_reconip");
   1203 		return(retcode);
   1204 
   1205 	case RAIDFRAME_GET_INFO:
   1206 		if (!raidPtr->valid)
   1207 			return (ENODEV);
   1208 		ucfgp = (RF_DeviceConfig_t **) data;
   1209 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1210 			  (RF_DeviceConfig_t *));
   1211 		if (d_cfg == NULL)
   1212 			return (ENOMEM);
   1213 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1214 		d_cfg->rows = raidPtr->numRow;
   1215 		d_cfg->cols = raidPtr->numCol;
   1216 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1217 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1218 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1219 			return (ENOMEM);
   1220 		}
   1221 		d_cfg->nspares = raidPtr->numSpare;
   1222 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1223 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1224 			return (ENOMEM);
   1225 		}
   1226 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1227 		d = 0;
   1228 		for (i = 0; i < d_cfg->rows; i++) {
   1229 			for (j = 0; j < d_cfg->cols; j++) {
   1230 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1231 				d++;
   1232 			}
   1233 		}
   1234 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1235 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1236 		}
   1237 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1238 				  sizeof(RF_DeviceConfig_t));
   1239 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1240 
   1241 		return (retcode);
   1242 
   1243 	case RAIDFRAME_CHECK_PARITY:
   1244 		*(int *) data = raidPtr->parity_good;
   1245 		return (0);
   1246 
   1247 	case RAIDFRAME_RESET_ACCTOTALS:
   1248 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1249 		return (0);
   1250 
   1251 	case RAIDFRAME_GET_ACCTOTALS:
   1252 		totals = (RF_AccTotals_t *) data;
   1253 		*totals = raidPtr->acc_totals;
   1254 		return (0);
   1255 
   1256 	case RAIDFRAME_KEEP_ACCTOTALS:
   1257 		raidPtr->keep_acc_totals = *(int *)data;
   1258 		return (0);
   1259 
   1260 	case RAIDFRAME_GET_SIZE:
   1261 		*(int *) data = raidPtr->totalSectors;
   1262 		return (0);
   1263 
   1264 		/* fail a disk & optionally start reconstruction */
   1265 	case RAIDFRAME_FAIL_DISK:
   1266 
   1267 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1268 			/* Can't do this on a RAID 0!! */
   1269 			return(EINVAL);
   1270 		}
   1271 
   1272 		rr = (struct rf_recon_req *) data;
   1273 
   1274 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1275 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1276 			return (EINVAL);
   1277 
   1278 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1279 		       unit, rr->row, rr->col);
   1280 
   1281 		/* make a copy of the recon request so that we don't rely on
   1282 		 * the user's buffer */
   1283 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1284 		if (rrcopy == NULL)
   1285 			return(ENOMEM);
   1286 		memcpy(rrcopy, rr, sizeof(*rr));
   1287 		rrcopy->raidPtr = (void *) raidPtr;
   1288 
   1289 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1290 					   rf_ReconThread,
   1291 					   rrcopy,"raid_recon");
   1292 		return (0);
   1293 
   1294 		/* invoke a copyback operation after recon on whatever disk
   1295 		 * needs it, if any */
   1296 	case RAIDFRAME_COPYBACK:
   1297 
   1298 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1299 			/* This makes no sense on a RAID 0!! */
   1300 			return(EINVAL);
   1301 		}
   1302 
   1303 		if (raidPtr->copyback_in_progress == 1) {
   1304 			/* Copyback is already in progress! */
   1305 			return(EINVAL);
   1306 		}
   1307 
   1308 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1309 					   rf_CopybackThread,
   1310 					   raidPtr,"raid_copyback");
   1311 		return (retcode);
   1312 
   1313 		/* return the percentage completion of reconstruction */
   1314 	case RAIDFRAME_CHECK_RECON_STATUS:
   1315 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1316 			/* This makes no sense on a RAID 0, so tell the
   1317 			   user it's done. */
   1318 			*(int *) data = 100;
   1319 			return(0);
   1320 		}
   1321 		row = 0; /* XXX we only consider a single row... */
   1322 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1323 			*(int *) data = 100;
   1324 		else
   1325 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1326 		return (0);
   1327 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1328 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1329 		row = 0; /* XXX we only consider a single row... */
   1330 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1331 			progressInfo.remaining = 0;
   1332 			progressInfo.completed = 100;
   1333 			progressInfo.total = 100;
   1334 		} else {
   1335 			progressInfo.total =
   1336 				raidPtr->reconControl[row]->numRUsTotal;
   1337 			progressInfo.completed =
   1338 				raidPtr->reconControl[row]->numRUsComplete;
   1339 			progressInfo.remaining = progressInfo.total -
   1340 				progressInfo.completed;
   1341 		}
   1342 		retcode = copyout((caddr_t) &progressInfo,
   1343 				  (caddr_t) *progressInfoPtr,
   1344 				  sizeof(RF_ProgressInfo_t));
   1345 		return (retcode);
   1346 
   1347 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1348 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1349 			/* This makes no sense on a RAID 0, so tell the
   1350 			   user it's done. */
   1351 			*(int *) data = 100;
   1352 			return(0);
   1353 		}
   1354 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1355 			*(int *) data = 100 *
   1356 				raidPtr->parity_rewrite_stripes_done /
   1357 				raidPtr->Layout.numStripe;
   1358 		} else {
   1359 			*(int *) data = 100;
   1360 		}
   1361 		return (0);
   1362 
   1363 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1364 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1365 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1366 			progressInfo.total = raidPtr->Layout.numStripe;
   1367 			progressInfo.completed =
   1368 				raidPtr->parity_rewrite_stripes_done;
   1369 			progressInfo.remaining = progressInfo.total -
   1370 				progressInfo.completed;
   1371 		} else {
   1372 			progressInfo.remaining = 0;
   1373 			progressInfo.completed = 100;
   1374 			progressInfo.total = 100;
   1375 		}
   1376 		retcode = copyout((caddr_t) &progressInfo,
   1377 				  (caddr_t) *progressInfoPtr,
   1378 				  sizeof(RF_ProgressInfo_t));
   1379 		return (retcode);
   1380 
   1381 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1382 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1383 			/* This makes no sense on a RAID 0 */
   1384 			*(int *) data = 100;
   1385 			return(0);
   1386 		}
   1387 		if (raidPtr->copyback_in_progress == 1) {
   1388 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1389 				raidPtr->Layout.numStripe;
   1390 		} else {
   1391 			*(int *) data = 100;
   1392 		}
   1393 		return (0);
   1394 
   1395 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1396 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1397 		if (raidPtr->copyback_in_progress == 1) {
   1398 			progressInfo.total = raidPtr->Layout.numStripe;
   1399 			progressInfo.completed =
   1400 				raidPtr->copyback_stripes_done;
   1401 			progressInfo.remaining = progressInfo.total -
   1402 				progressInfo.completed;
   1403 		} else {
   1404 			progressInfo.remaining = 0;
   1405 			progressInfo.completed = 100;
   1406 			progressInfo.total = 100;
   1407 		}
   1408 		retcode = copyout((caddr_t) &progressInfo,
   1409 				  (caddr_t) *progressInfoPtr,
   1410 				  sizeof(RF_ProgressInfo_t));
   1411 		return (retcode);
   1412 
   1413 		/* the sparetable daemon calls this to wait for the kernel to
   1414 		 * need a spare table. this ioctl does not return until a
   1415 		 * spare table is needed. XXX -- calling mpsleep here in the
   1416 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1417 		 * -- I should either compute the spare table in the kernel,
   1418 		 * or have a different -- XXX XXX -- interface (a different
   1419 		 * character device) for delivering the table     -- XXX */
   1420 #if 0
   1421 	case RAIDFRAME_SPARET_WAIT:
   1422 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1423 		while (!rf_sparet_wait_queue)
   1424 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1425 		waitreq = rf_sparet_wait_queue;
   1426 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1427 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1428 
   1429 		/* structure assignment */
   1430 		*((RF_SparetWait_t *) data) = *waitreq;
   1431 
   1432 		RF_Free(waitreq, sizeof(*waitreq));
   1433 		return (0);
   1434 
   1435 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1436 		 * code in it that will cause the dameon to exit */
   1437 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1438 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1439 		waitreq->fcol = -1;
   1440 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1441 		waitreq->next = rf_sparet_wait_queue;
   1442 		rf_sparet_wait_queue = waitreq;
   1443 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1444 		wakeup(&rf_sparet_wait_queue);
   1445 		return (0);
   1446 
   1447 		/* used by the spare table daemon to deliver a spare table
   1448 		 * into the kernel */
   1449 	case RAIDFRAME_SEND_SPARET:
   1450 
   1451 		/* install the spare table */
   1452 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1453 
   1454 		/* respond to the requestor.  the return status of the spare
   1455 		 * table installation is passed in the "fcol" field */
   1456 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1457 		waitreq->fcol = retcode;
   1458 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1459 		waitreq->next = rf_sparet_resp_queue;
   1460 		rf_sparet_resp_queue = waitreq;
   1461 		wakeup(&rf_sparet_resp_queue);
   1462 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1463 
   1464 		return (retcode);
   1465 #endif
   1466 
   1467 	default:
   1468 		break; /* fall through to the os-specific code below */
   1469 
   1470 	}
   1471 
   1472 	if (!raidPtr->valid)
   1473 		return (EINVAL);
   1474 
   1475 	/*
   1476 	 * Add support for "regular" device ioctls here.
   1477 	 */
   1478 
   1479 	switch (cmd) {
   1480 	case DIOCGDINFO:
   1481 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1482 		break;
   1483 #ifdef __HAVE_OLD_DISKLABEL
   1484 	case ODIOCGDINFO:
   1485 		newlabel = *(rs->sc_dkdev.dk_label);
   1486 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1487 			return ENOTTY;
   1488 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1489 		break;
   1490 #endif
   1491 
   1492 	case DIOCGPART:
   1493 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1494 		((struct partinfo *) data)->part =
   1495 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1496 		break;
   1497 
   1498 	case DIOCWDINFO:
   1499 	case DIOCSDINFO:
   1500 #ifdef __HAVE_OLD_DISKLABEL
   1501 	case ODIOCWDINFO:
   1502 	case ODIOCSDINFO:
   1503 #endif
   1504 	{
   1505 		struct disklabel *lp;
   1506 #ifdef __HAVE_OLD_DISKLABEL
   1507 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1508 			memset(&newlabel, 0, sizeof newlabel);
   1509 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1510 			lp = &newlabel;
   1511 		} else
   1512 #endif
   1513 		lp = (struct disklabel *)data;
   1514 
   1515 		if ((error = raidlock(rs)) != 0)
   1516 			return (error);
   1517 
   1518 		rs->sc_flags |= RAIDF_LABELLING;
   1519 
   1520 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1521 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1522 		if (error == 0) {
   1523 			if (cmd == DIOCWDINFO
   1524 #ifdef __HAVE_OLD_DISKLABEL
   1525 			    || cmd == ODIOCWDINFO
   1526 #endif
   1527 			   )
   1528 				error = writedisklabel(RAIDLABELDEV(dev),
   1529 				    raidstrategy, rs->sc_dkdev.dk_label,
   1530 				    rs->sc_dkdev.dk_cpulabel);
   1531 		}
   1532 		rs->sc_flags &= ~RAIDF_LABELLING;
   1533 
   1534 		raidunlock(rs);
   1535 
   1536 		if (error)
   1537 			return (error);
   1538 		break;
   1539 	}
   1540 
   1541 	case DIOCWLABEL:
   1542 		if (*(int *) data != 0)
   1543 			rs->sc_flags |= RAIDF_WLABEL;
   1544 		else
   1545 			rs->sc_flags &= ~RAIDF_WLABEL;
   1546 		break;
   1547 
   1548 	case DIOCGDEFLABEL:
   1549 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1550 		break;
   1551 
   1552 #ifdef __HAVE_OLD_DISKLABEL
   1553 	case ODIOCGDEFLABEL:
   1554 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1555 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1556 			return ENOTTY;
   1557 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1558 		break;
   1559 #endif
   1560 
   1561 	default:
   1562 		retcode = ENOTTY;
   1563 	}
   1564 	return (retcode);
   1565 
   1566 }
   1567 
   1568 
   1569 /* raidinit -- complete the rest of the initialization for the
   1570    RAIDframe device.  */
   1571 
   1572 
   1573 static void
   1574 raidinit(raidPtr)
   1575 	RF_Raid_t *raidPtr;
   1576 {
   1577 	struct raid_softc *rs;
   1578 	int     unit;
   1579 
   1580 	unit = raidPtr->raidid;
   1581 
   1582 	rs = &raid_softc[unit];
   1583 
   1584 	/* XXX should check return code first... */
   1585 	rs->sc_flags |= RAIDF_INITED;
   1586 
   1587 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1588 
   1589 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1590 
   1591 	/* disk_attach actually creates space for the CPU disklabel, among
   1592 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1593 	 * with disklabels. */
   1594 
   1595 	disk_attach(&rs->sc_dkdev);
   1596 
   1597 	/* XXX There may be a weird interaction here between this, and
   1598 	 * protectedSectors, as used in RAIDframe.  */
   1599 
   1600 	rs->sc_size = raidPtr->totalSectors;
   1601 
   1602 }
   1603 
   1604 /* wake up the daemon & tell it to get us a spare table
   1605  * XXX
   1606  * the entries in the queues should be tagged with the raidPtr
   1607  * so that in the extremely rare case that two recons happen at once,
   1608  * we know for which device were requesting a spare table
   1609  * XXX
   1610  *
   1611  * XXX This code is not currently used. GO
   1612  */
   1613 int
   1614 rf_GetSpareTableFromDaemon(req)
   1615 	RF_SparetWait_t *req;
   1616 {
   1617 	int     retcode;
   1618 
   1619 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1620 	req->next = rf_sparet_wait_queue;
   1621 	rf_sparet_wait_queue = req;
   1622 	wakeup(&rf_sparet_wait_queue);
   1623 
   1624 	/* mpsleep unlocks the mutex */
   1625 	while (!rf_sparet_resp_queue) {
   1626 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1627 		    "raidframe getsparetable", 0);
   1628 	}
   1629 	req = rf_sparet_resp_queue;
   1630 	rf_sparet_resp_queue = req->next;
   1631 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1632 
   1633 	retcode = req->fcol;
   1634 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1635 					 * alloc'd */
   1636 	return (retcode);
   1637 }
   1638 
   1639 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1640  * bp & passes it down.
   1641  * any calls originating in the kernel must use non-blocking I/O
   1642  * do some extra sanity checking to return "appropriate" error values for
   1643  * certain conditions (to make some standard utilities work)
   1644  *
   1645  * Formerly known as: rf_DoAccessKernel
   1646  */
   1647 void
   1648 raidstart(raidPtr)
   1649 	RF_Raid_t *raidPtr;
   1650 {
   1651 	RF_SectorCount_t num_blocks, pb, sum;
   1652 	RF_RaidAddr_t raid_addr;
   1653 	int     retcode;
   1654 	struct partition *pp;
   1655 	daddr_t blocknum;
   1656 	int     unit;
   1657 	struct raid_softc *rs;
   1658 	int     do_async;
   1659 	struct buf *bp;
   1660 
   1661 	unit = raidPtr->raidid;
   1662 	rs = &raid_softc[unit];
   1663 
   1664 	/* quick check to see if anything has died recently */
   1665 	RF_LOCK_MUTEX(raidPtr->mutex);
   1666 	if (raidPtr->numNewFailures > 0) {
   1667 		rf_update_component_labels(raidPtr,
   1668 					   RF_NORMAL_COMPONENT_UPDATE);
   1669 		raidPtr->numNewFailures--;
   1670 	}
   1671 
   1672 	/* Check to see if we're at the limit... */
   1673 	while (raidPtr->openings > 0) {
   1674 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1675 
   1676 		/* get the next item, if any, from the queue */
   1677 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1678 			/* nothing more to do */
   1679 			return;
   1680 		}
   1681 
   1682 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1683 		 * partition.. Need to make it absolute to the underlying
   1684 		 * device.. */
   1685 
   1686 		blocknum = bp->b_blkno;
   1687 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1688 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1689 			blocknum += pp->p_offset;
   1690 		}
   1691 
   1692 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1693 			    (int) blocknum));
   1694 
   1695 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1696 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1697 
   1698 		/* *THIS* is where we adjust what block we're going to...
   1699 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1700 		raid_addr = blocknum;
   1701 
   1702 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1703 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1704 		sum = raid_addr + num_blocks + pb;
   1705 		if (1 || rf_debugKernelAccess) {
   1706 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1707 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1708 				    (int) pb, (int) bp->b_resid));
   1709 		}
   1710 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1711 		    || (sum < num_blocks) || (sum < pb)) {
   1712 			bp->b_error = ENOSPC;
   1713 			bp->b_flags |= B_ERROR;
   1714 			bp->b_resid = bp->b_bcount;
   1715 			biodone(bp);
   1716 			RF_LOCK_MUTEX(raidPtr->mutex);
   1717 			continue;
   1718 		}
   1719 		/*
   1720 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1721 		 */
   1722 
   1723 		if (bp->b_bcount & raidPtr->sectorMask) {
   1724 			bp->b_error = EINVAL;
   1725 			bp->b_flags |= B_ERROR;
   1726 			bp->b_resid = bp->b_bcount;
   1727 			biodone(bp);
   1728 			RF_LOCK_MUTEX(raidPtr->mutex);
   1729 			continue;
   1730 
   1731 		}
   1732 		db1_printf(("Calling DoAccess..\n"));
   1733 
   1734 
   1735 		RF_LOCK_MUTEX(raidPtr->mutex);
   1736 		raidPtr->openings--;
   1737 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1738 
   1739 		/*
   1740 		 * Everything is async.
   1741 		 */
   1742 		do_async = 1;
   1743 
   1744 		disk_busy(&rs->sc_dkdev);
   1745 
   1746 		/* XXX we're still at splbio() here... do we *really*
   1747 		   need to be? */
   1748 
   1749 		/* don't ever condition on bp->b_flags & B_WRITE.
   1750 		 * always condition on B_READ instead */
   1751 
   1752 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1753 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1754 				      do_async, raid_addr, num_blocks,
   1755 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1756 
   1757 		RF_LOCK_MUTEX(raidPtr->mutex);
   1758 	}
   1759 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1760 }
   1761 
   1762 
   1763 
   1764 
   1765 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1766 
   1767 int
   1768 rf_DispatchKernelIO(queue, req)
   1769 	RF_DiskQueue_t *queue;
   1770 	RF_DiskQueueData_t *req;
   1771 {
   1772 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1773 	struct buf *bp;
   1774 	struct raidbuf *raidbp = NULL;
   1775 
   1776 	req->queue = queue;
   1777 
   1778 #if DIAGNOSTIC
   1779 	if (queue->raidPtr->raidid >= numraid) {
   1780 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   1781 		    numraid);
   1782 		panic("Invalid Unit number in rf_DispatchKernelIO");
   1783 	}
   1784 #endif
   1785 
   1786 	bp = req->bp;
   1787 #if 1
   1788 	/* XXX when there is a physical disk failure, someone is passing us a
   1789 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1790 	 * without taking a performance hit... (not sure where the real bug
   1791 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1792 
   1793 	if (bp->b_flags & B_ERROR) {
   1794 		bp->b_flags &= ~B_ERROR;
   1795 	}
   1796 	if (bp->b_error != 0) {
   1797 		bp->b_error = 0;
   1798 	}
   1799 #endif
   1800 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1801 
   1802 	/*
   1803 	 * context for raidiodone
   1804 	 */
   1805 	raidbp->rf_obp = bp;
   1806 	raidbp->req = req;
   1807 
   1808 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1809 
   1810 	switch (req->type) {
   1811 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1812 		/* XXX need to do something extra here.. */
   1813 		/* I'm leaving this in, as I've never actually seen it used,
   1814 		 * and I'd like folks to report it... GO */
   1815 		printf(("WAKEUP CALLED\n"));
   1816 		queue->numOutstanding++;
   1817 
   1818 		/* XXX need to glue the original buffer into this??  */
   1819 
   1820 		KernelWakeupFunc(&raidbp->rf_buf);
   1821 		break;
   1822 
   1823 	case RF_IO_TYPE_READ:
   1824 	case RF_IO_TYPE_WRITE:
   1825 
   1826 		if (req->tracerec) {
   1827 			RF_ETIMER_START(req->tracerec->timer);
   1828 		}
   1829 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1830 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1831 		    req->sectorOffset, req->numSector,
   1832 		    req->buf, KernelWakeupFunc, (void *) req,
   1833 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1834 
   1835 		if (rf_debugKernelAccess) {
   1836 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1837 				(long) bp->b_blkno));
   1838 		}
   1839 		queue->numOutstanding++;
   1840 		queue->last_deq_sector = req->sectorOffset;
   1841 		/* acc wouldn't have been let in if there were any pending
   1842 		 * reqs at any other priority */
   1843 		queue->curPriority = req->priority;
   1844 
   1845 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1846 			    req->type, queue->raidPtr->raidid,
   1847 			    queue->row, queue->col));
   1848 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1849 			(int) req->sectorOffset, (int) req->numSector,
   1850 			(int) (req->numSector <<
   1851 			    queue->raidPtr->logBytesPerSector),
   1852 			(int) queue->raidPtr->logBytesPerSector));
   1853 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1854 			raidbp->rf_buf.b_vp->v_numoutput++;
   1855 		}
   1856 		VOP_STRATEGY(&raidbp->rf_buf);
   1857 
   1858 		break;
   1859 
   1860 	default:
   1861 		panic("bad req->type in rf_DispatchKernelIO");
   1862 	}
   1863 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1864 
   1865 	return (0);
   1866 }
   1867 /* this is the callback function associated with a I/O invoked from
   1868    kernel code.
   1869  */
   1870 static void
   1871 KernelWakeupFunc(vbp)
   1872 	struct buf *vbp;
   1873 {
   1874 	RF_DiskQueueData_t *req = NULL;
   1875 	RF_DiskQueue_t *queue;
   1876 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1877 	struct buf *bp;
   1878 	int s;
   1879 
   1880 	s = splbio();
   1881 	db1_printf(("recovering the request queue:\n"));
   1882 	req = raidbp->req;
   1883 
   1884 	bp = raidbp->rf_obp;
   1885 
   1886 	queue = (RF_DiskQueue_t *) req->queue;
   1887 
   1888 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1889 		bp->b_flags |= B_ERROR;
   1890 		bp->b_error = raidbp->rf_buf.b_error ?
   1891 		    raidbp->rf_buf.b_error : EIO;
   1892 	}
   1893 
   1894 	/* XXX methinks this could be wrong... */
   1895 #if 1
   1896 	bp->b_resid = raidbp->rf_buf.b_resid;
   1897 #endif
   1898 
   1899 	if (req->tracerec) {
   1900 		RF_ETIMER_STOP(req->tracerec->timer);
   1901 		RF_ETIMER_EVAL(req->tracerec->timer);
   1902 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1903 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1904 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1905 		req->tracerec->num_phys_ios++;
   1906 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1907 	}
   1908 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1909 
   1910 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1911 	 * ballistic, and mark the component as hosed... */
   1912 
   1913 	if (bp->b_flags & B_ERROR) {
   1914 		/* Mark the disk as dead */
   1915 		/* but only mark it once... */
   1916 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1917 		    rf_ds_optimal) {
   1918 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1919 			       queue->raidPtr->raidid,
   1920 			       queue->raidPtr->Disks[queue->row][queue->col].devname);
   1921 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1922 			    rf_ds_failed;
   1923 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1924 			queue->raidPtr->numFailures++;
   1925 			queue->raidPtr->numNewFailures++;
   1926 		} else {	/* Disk is already dead... */
   1927 			/* printf("Disk already marked as dead!\n"); */
   1928 		}
   1929 
   1930 	}
   1931 
   1932 	pool_put(&raidframe_cbufpool, raidbp);
   1933 
   1934 	/* Fill in the error value */
   1935 
   1936 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   1937 
   1938 	simple_lock(&queue->raidPtr->iodone_lock);
   1939 
   1940 	/* Drop this one on the "finished" queue... */
   1941 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   1942 
   1943 	/* Let the raidio thread know there is work to be done. */
   1944 	wakeup(&(queue->raidPtr->iodone));
   1945 
   1946 	simple_unlock(&queue->raidPtr->iodone_lock);
   1947 
   1948 	splx(s);
   1949 }
   1950 
   1951 
   1952 
   1953 /*
   1954  * initialize a buf structure for doing an I/O in the kernel.
   1955  */
   1956 static void
   1957 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1958        logBytesPerSector, b_proc)
   1959 	struct buf *bp;
   1960 	struct vnode *b_vp;
   1961 	unsigned rw_flag;
   1962 	dev_t dev;
   1963 	RF_SectorNum_t startSect;
   1964 	RF_SectorCount_t numSect;
   1965 	caddr_t buf;
   1966 	void (*cbFunc) (struct buf *);
   1967 	void *cbArg;
   1968 	int logBytesPerSector;
   1969 	struct proc *b_proc;
   1970 {
   1971 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1972 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1973 	bp->b_bcount = numSect << logBytesPerSector;
   1974 	bp->b_bufsize = bp->b_bcount;
   1975 	bp->b_error = 0;
   1976 	bp->b_dev = dev;
   1977 	bp->b_data = buf;
   1978 	bp->b_blkno = startSect;
   1979 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1980 	if (bp->b_bcount == 0) {
   1981 		panic("bp->b_bcount is zero in InitBP!!");
   1982 	}
   1983 	bp->b_proc = b_proc;
   1984 	bp->b_iodone = cbFunc;
   1985 	bp->b_vp = b_vp;
   1986 
   1987 }
   1988 
   1989 static void
   1990 raidgetdefaultlabel(raidPtr, rs, lp)
   1991 	RF_Raid_t *raidPtr;
   1992 	struct raid_softc *rs;
   1993 	struct disklabel *lp;
   1994 {
   1995 	db1_printf(("Building a default label...\n"));
   1996 	memset(lp, 0, sizeof(*lp));
   1997 
   1998 	/* fabricate a label... */
   1999 	lp->d_secperunit = raidPtr->totalSectors;
   2000 	lp->d_secsize = raidPtr->bytesPerSector;
   2001 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2002 	lp->d_ntracks = 4 * raidPtr->numCol;
   2003 	lp->d_ncylinders = raidPtr->totalSectors /
   2004 		(lp->d_nsectors * lp->d_ntracks);
   2005 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2006 
   2007 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2008 	lp->d_type = DTYPE_RAID;
   2009 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2010 	lp->d_rpm = 3600;
   2011 	lp->d_interleave = 1;
   2012 	lp->d_flags = 0;
   2013 
   2014 	lp->d_partitions[RAW_PART].p_offset = 0;
   2015 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2016 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2017 	lp->d_npartitions = RAW_PART + 1;
   2018 
   2019 	lp->d_magic = DISKMAGIC;
   2020 	lp->d_magic2 = DISKMAGIC;
   2021 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2022 
   2023 }
   2024 /*
   2025  * Read the disklabel from the raid device.  If one is not present, fake one
   2026  * up.
   2027  */
   2028 static void
   2029 raidgetdisklabel(dev)
   2030 	dev_t   dev;
   2031 {
   2032 	int     unit = raidunit(dev);
   2033 	struct raid_softc *rs = &raid_softc[unit];
   2034 	char   *errstring;
   2035 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2036 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2037 	RF_Raid_t *raidPtr;
   2038 
   2039 	db1_printf(("Getting the disklabel...\n"));
   2040 
   2041 	memset(clp, 0, sizeof(*clp));
   2042 
   2043 	raidPtr = raidPtrs[unit];
   2044 
   2045 	raidgetdefaultlabel(raidPtr, rs, lp);
   2046 
   2047 	/*
   2048 	 * Call the generic disklabel extraction routine.
   2049 	 */
   2050 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2051 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2052 	if (errstring)
   2053 		raidmakedisklabel(rs);
   2054 	else {
   2055 		int     i;
   2056 		struct partition *pp;
   2057 
   2058 		/*
   2059 		 * Sanity check whether the found disklabel is valid.
   2060 		 *
   2061 		 * This is necessary since total size of the raid device
   2062 		 * may vary when an interleave is changed even though exactly
   2063 		 * same componets are used, and old disklabel may used
   2064 		 * if that is found.
   2065 		 */
   2066 		if (lp->d_secperunit != rs->sc_size)
   2067 			printf("raid%d: WARNING: %s: "
   2068 			    "total sector size in disklabel (%d) != "
   2069 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2070 			    lp->d_secperunit, (long) rs->sc_size);
   2071 		for (i = 0; i < lp->d_npartitions; i++) {
   2072 			pp = &lp->d_partitions[i];
   2073 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2074 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2075 				       "exceeds the size of raid (%ld)\n",
   2076 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2077 		}
   2078 	}
   2079 
   2080 }
   2081 /*
   2082  * Take care of things one might want to take care of in the event
   2083  * that a disklabel isn't present.
   2084  */
   2085 static void
   2086 raidmakedisklabel(rs)
   2087 	struct raid_softc *rs;
   2088 {
   2089 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2090 	db1_printf(("Making a label..\n"));
   2091 
   2092 	/*
   2093 	 * For historical reasons, if there's no disklabel present
   2094 	 * the raw partition must be marked FS_BSDFFS.
   2095 	 */
   2096 
   2097 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2098 
   2099 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2100 
   2101 	lp->d_checksum = dkcksum(lp);
   2102 }
   2103 /*
   2104  * Lookup the provided name in the filesystem.  If the file exists,
   2105  * is a valid block device, and isn't being used by anyone else,
   2106  * set *vpp to the file's vnode.
   2107  * You'll find the original of this in ccd.c
   2108  */
   2109 int
   2110 raidlookup(path, p, vpp)
   2111 	char   *path;
   2112 	struct proc *p;
   2113 	struct vnode **vpp;	/* result */
   2114 {
   2115 	struct nameidata nd;
   2116 	struct vnode *vp;
   2117 	struct vattr va;
   2118 	int     error;
   2119 
   2120 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2121 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2122 #if 0
   2123 		printf("RAIDframe: vn_open returned %d\n", error);
   2124 #endif
   2125 		return (error);
   2126 	}
   2127 	vp = nd.ni_vp;
   2128 	if (vp->v_usecount > 1) {
   2129 		VOP_UNLOCK(vp, 0);
   2130 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2131 		return (EBUSY);
   2132 	}
   2133 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2134 		VOP_UNLOCK(vp, 0);
   2135 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2136 		return (error);
   2137 	}
   2138 	/* XXX: eventually we should handle VREG, too. */
   2139 	if (va.va_type != VBLK) {
   2140 		VOP_UNLOCK(vp, 0);
   2141 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2142 		return (ENOTBLK);
   2143 	}
   2144 	VOP_UNLOCK(vp, 0);
   2145 	*vpp = vp;
   2146 	return (0);
   2147 }
   2148 /*
   2149  * Wait interruptibly for an exclusive lock.
   2150  *
   2151  * XXX
   2152  * Several drivers do this; it should be abstracted and made MP-safe.
   2153  * (Hmm... where have we seen this warning before :->  GO )
   2154  */
   2155 static int
   2156 raidlock(rs)
   2157 	struct raid_softc *rs;
   2158 {
   2159 	int     error;
   2160 
   2161 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2162 		rs->sc_flags |= RAIDF_WANTED;
   2163 		if ((error =
   2164 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2165 			return (error);
   2166 	}
   2167 	rs->sc_flags |= RAIDF_LOCKED;
   2168 	return (0);
   2169 }
   2170 /*
   2171  * Unlock and wake up any waiters.
   2172  */
   2173 static void
   2174 raidunlock(rs)
   2175 	struct raid_softc *rs;
   2176 {
   2177 
   2178 	rs->sc_flags &= ~RAIDF_LOCKED;
   2179 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2180 		rs->sc_flags &= ~RAIDF_WANTED;
   2181 		wakeup(rs);
   2182 	}
   2183 }
   2184 
   2185 
   2186 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2187 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2188 
   2189 int
   2190 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2191 {
   2192 	RF_ComponentLabel_t clabel;
   2193 	raidread_component_label(dev, b_vp, &clabel);
   2194 	clabel.mod_counter = mod_counter;
   2195 	clabel.clean = RF_RAID_CLEAN;
   2196 	raidwrite_component_label(dev, b_vp, &clabel);
   2197 	return(0);
   2198 }
   2199 
   2200 
   2201 int
   2202 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2203 {
   2204 	RF_ComponentLabel_t clabel;
   2205 	raidread_component_label(dev, b_vp, &clabel);
   2206 	clabel.mod_counter = mod_counter;
   2207 	clabel.clean = RF_RAID_DIRTY;
   2208 	raidwrite_component_label(dev, b_vp, &clabel);
   2209 	return(0);
   2210 }
   2211 
   2212 /* ARGSUSED */
   2213 int
   2214 raidread_component_label(dev, b_vp, clabel)
   2215 	dev_t dev;
   2216 	struct vnode *b_vp;
   2217 	RF_ComponentLabel_t *clabel;
   2218 {
   2219 	struct buf *bp;
   2220 	const struct bdevsw *bdev;
   2221 	int error;
   2222 
   2223 	/* XXX should probably ensure that we don't try to do this if
   2224 	   someone has changed rf_protected_sectors. */
   2225 
   2226 	if (b_vp == NULL) {
   2227 		/* For whatever reason, this component is not valid.
   2228 		   Don't try to read a component label from it. */
   2229 		return(EINVAL);
   2230 	}
   2231 
   2232 	/* get a block of the appropriate size... */
   2233 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2234 	bp->b_dev = dev;
   2235 
   2236 	/* get our ducks in a row for the read */
   2237 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2238 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2239 	bp->b_flags |= B_READ;
   2240  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2241 
   2242 	bdev = bdevsw_lookup(bp->b_dev);
   2243 	if (bdev == NULL)
   2244 		return (ENXIO);
   2245 	(*bdev->d_strategy)(bp);
   2246 
   2247 	error = biowait(bp);
   2248 
   2249 	if (!error) {
   2250 		memcpy(clabel, bp->b_data,
   2251 		       sizeof(RF_ComponentLabel_t));
   2252 #if 0
   2253 		rf_print_component_label( clabel );
   2254 #endif
   2255         } else {
   2256 #if 0
   2257 		printf("Failed to read RAID component label!\n");
   2258 #endif
   2259 	}
   2260 
   2261 	brelse(bp);
   2262 	return(error);
   2263 }
   2264 /* ARGSUSED */
   2265 int
   2266 raidwrite_component_label(dev, b_vp, clabel)
   2267 	dev_t dev;
   2268 	struct vnode *b_vp;
   2269 	RF_ComponentLabel_t *clabel;
   2270 {
   2271 	struct buf *bp;
   2272 	const struct bdevsw *bdev;
   2273 	int error;
   2274 
   2275 	/* get a block of the appropriate size... */
   2276 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2277 	bp->b_dev = dev;
   2278 
   2279 	/* get our ducks in a row for the write */
   2280 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2281 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2282 	bp->b_flags |= B_WRITE;
   2283  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2284 
   2285 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2286 
   2287 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2288 
   2289 	bdev = bdevsw_lookup(bp->b_dev);
   2290 	if (bdev == NULL)
   2291 		return (ENXIO);
   2292 	(*bdev->d_strategy)(bp);
   2293 	error = biowait(bp);
   2294 	brelse(bp);
   2295 	if (error) {
   2296 #if 1
   2297 		printf("Failed to write RAID component info!\n");
   2298 #endif
   2299 	}
   2300 
   2301 	return(error);
   2302 }
   2303 
   2304 void
   2305 rf_markalldirty(raidPtr)
   2306 	RF_Raid_t *raidPtr;
   2307 {
   2308 	RF_ComponentLabel_t clabel;
   2309 	int r,c;
   2310 
   2311 	raidPtr->mod_counter++;
   2312 	for (r = 0; r < raidPtr->numRow; r++) {
   2313 		for (c = 0; c < raidPtr->numCol; c++) {
   2314 			/* we don't want to touch (at all) a disk that has
   2315 			   failed */
   2316 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2317 				raidread_component_label(
   2318 					raidPtr->Disks[r][c].dev,
   2319 					raidPtr->raid_cinfo[r][c].ci_vp,
   2320 					&clabel);
   2321 				if (clabel.status == rf_ds_spared) {
   2322 					/* XXX do something special...
   2323 					 but whatever you do, don't
   2324 					 try to access it!! */
   2325 				} else {
   2326 #if 0
   2327 				clabel.status =
   2328 					raidPtr->Disks[r][c].status;
   2329 				raidwrite_component_label(
   2330 					raidPtr->Disks[r][c].dev,
   2331 					raidPtr->raid_cinfo[r][c].ci_vp,
   2332 					&clabel);
   2333 #endif
   2334 				raidmarkdirty(
   2335 				       raidPtr->Disks[r][c].dev,
   2336 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2337 				       raidPtr->mod_counter);
   2338 				}
   2339 			}
   2340 		}
   2341 	}
   2342 	/* printf("Component labels marked dirty.\n"); */
   2343 #if 0
   2344 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2345 		sparecol = raidPtr->numCol + c;
   2346 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2347 			/*
   2348 
   2349 			   XXX this is where we get fancy and map this spare
   2350 			   into it's correct spot in the array.
   2351 
   2352 			 */
   2353 			/*
   2354 
   2355 			   we claim this disk is "optimal" if it's
   2356 			   rf_ds_used_spare, as that means it should be
   2357 			   directly substitutable for the disk it replaced.
   2358 			   We note that too...
   2359 
   2360 			 */
   2361 
   2362 			for(i=0;i<raidPtr->numRow;i++) {
   2363 				for(j=0;j<raidPtr->numCol;j++) {
   2364 					if ((raidPtr->Disks[i][j].spareRow ==
   2365 					     r) &&
   2366 					    (raidPtr->Disks[i][j].spareCol ==
   2367 					     sparecol)) {
   2368 						srow = r;
   2369 						scol = sparecol;
   2370 						break;
   2371 					}
   2372 				}
   2373 			}
   2374 
   2375 			raidread_component_label(
   2376 				      raidPtr->Disks[r][sparecol].dev,
   2377 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2378 				      &clabel);
   2379 			/* make sure status is noted */
   2380 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2381 			clabel.mod_counter = raidPtr->mod_counter;
   2382 			clabel.serial_number = raidPtr->serial_number;
   2383 			clabel.row = srow;
   2384 			clabel.column = scol;
   2385 			clabel.num_rows = raidPtr->numRow;
   2386 			clabel.num_columns = raidPtr->numCol;
   2387 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2388 			clabel.status = rf_ds_optimal;
   2389 			raidwrite_component_label(
   2390 				      raidPtr->Disks[r][sparecol].dev,
   2391 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2392 				      &clabel);
   2393 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2394 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2395 		}
   2396 	}
   2397 
   2398 #endif
   2399 }
   2400 
   2401 
   2402 void
   2403 rf_update_component_labels(raidPtr, final)
   2404 	RF_Raid_t *raidPtr;
   2405 	int final;
   2406 {
   2407 	RF_ComponentLabel_t clabel;
   2408 	int sparecol;
   2409 	int r,c;
   2410 	int i,j;
   2411 	int srow, scol;
   2412 
   2413 	srow = -1;
   2414 	scol = -1;
   2415 
   2416 	/* XXX should do extra checks to make sure things really are clean,
   2417 	   rather than blindly setting the clean bit... */
   2418 
   2419 	raidPtr->mod_counter++;
   2420 
   2421 	for (r = 0; r < raidPtr->numRow; r++) {
   2422 		for (c = 0; c < raidPtr->numCol; c++) {
   2423 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2424 				raidread_component_label(
   2425 					raidPtr->Disks[r][c].dev,
   2426 					raidPtr->raid_cinfo[r][c].ci_vp,
   2427 					&clabel);
   2428 				/* make sure status is noted */
   2429 				clabel.status = rf_ds_optimal;
   2430 				/* bump the counter */
   2431 				clabel.mod_counter = raidPtr->mod_counter;
   2432 
   2433 				raidwrite_component_label(
   2434 					raidPtr->Disks[r][c].dev,
   2435 					raidPtr->raid_cinfo[r][c].ci_vp,
   2436 					&clabel);
   2437 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2438 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2439 						raidmarkclean(
   2440 							      raidPtr->Disks[r][c].dev,
   2441 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2442 							      raidPtr->mod_counter);
   2443 					}
   2444 				}
   2445 			}
   2446 			/* else we don't touch it.. */
   2447 		}
   2448 	}
   2449 
   2450 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2451 		sparecol = raidPtr->numCol + c;
   2452 		/* Need to ensure that the reconstruct actually completed! */
   2453 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2454 			/*
   2455 
   2456 			   we claim this disk is "optimal" if it's
   2457 			   rf_ds_used_spare, as that means it should be
   2458 			   directly substitutable for the disk it replaced.
   2459 			   We note that too...
   2460 
   2461 			 */
   2462 
   2463 			for(i=0;i<raidPtr->numRow;i++) {
   2464 				for(j=0;j<raidPtr->numCol;j++) {
   2465 					if ((raidPtr->Disks[i][j].spareRow ==
   2466 					     0) &&
   2467 					    (raidPtr->Disks[i][j].spareCol ==
   2468 					     sparecol)) {
   2469 						srow = i;
   2470 						scol = j;
   2471 						break;
   2472 					}
   2473 				}
   2474 			}
   2475 
   2476 			/* XXX shouldn't *really* need this... */
   2477 			raidread_component_label(
   2478 				      raidPtr->Disks[0][sparecol].dev,
   2479 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2480 				      &clabel);
   2481 			/* make sure status is noted */
   2482 
   2483 			raid_init_component_label(raidPtr, &clabel);
   2484 
   2485 			clabel.mod_counter = raidPtr->mod_counter;
   2486 			clabel.row = srow;
   2487 			clabel.column = scol;
   2488 			clabel.status = rf_ds_optimal;
   2489 
   2490 			raidwrite_component_label(
   2491 				      raidPtr->Disks[0][sparecol].dev,
   2492 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2493 				      &clabel);
   2494 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2495 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2496 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2497 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2498 						       raidPtr->mod_counter);
   2499 				}
   2500 			}
   2501 		}
   2502 	}
   2503 	/* 	printf("Component labels updated\n"); */
   2504 }
   2505 
   2506 void
   2507 rf_close_component(raidPtr, vp, auto_configured)
   2508 	RF_Raid_t *raidPtr;
   2509 	struct vnode *vp;
   2510 	int auto_configured;
   2511 {
   2512 	struct proc *p;
   2513 
   2514 	p = raidPtr->engine_thread;
   2515 
   2516 	if (vp != NULL) {
   2517 		if (auto_configured == 1) {
   2518 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2519 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2520 			vput(vp);
   2521 
   2522 		} else {
   2523 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2524 		}
   2525 	} else {
   2526 #if 0
   2527 		printf("vnode was NULL\n");
   2528 #endif
   2529 	}
   2530 }
   2531 
   2532 
   2533 void
   2534 rf_UnconfigureVnodes(raidPtr)
   2535 	RF_Raid_t *raidPtr;
   2536 {
   2537 	int r,c;
   2538 	struct vnode *vp;
   2539 	int acd;
   2540 
   2541 
   2542 	/* We take this opportunity to close the vnodes like we should.. */
   2543 
   2544 	for (r = 0; r < raidPtr->numRow; r++) {
   2545 		for (c = 0; c < raidPtr->numCol; c++) {
   2546 #if 0
   2547 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2548 			       raidPtr->raidid, r, c);
   2549 #endif
   2550 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2551 			acd = raidPtr->Disks[r][c].auto_configured;
   2552 			rf_close_component(raidPtr, vp, acd);
   2553 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2554 			raidPtr->Disks[r][c].auto_configured = 0;
   2555 		}
   2556 	}
   2557 	for (r = 0; r < raidPtr->numSpare; r++) {
   2558 #if 0
   2559 		printf("raid%d: Closing vnode for spare: %d\n",
   2560 		       raidPtr->raidid, r);
   2561 #endif
   2562 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2563 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2564 		rf_close_component(raidPtr, vp, acd);
   2565 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2566 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2567 	}
   2568 }
   2569 
   2570 
   2571 void
   2572 rf_ReconThread(req)
   2573 	struct rf_recon_req *req;
   2574 {
   2575 	int     s;
   2576 	RF_Raid_t *raidPtr;
   2577 
   2578 	s = splbio();
   2579 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2580 	raidPtr->recon_in_progress = 1;
   2581 
   2582 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2583 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2584 
   2585 	/* XXX get rid of this! we don't need it at all.. */
   2586 	RF_Free(req, sizeof(*req));
   2587 
   2588 	raidPtr->recon_in_progress = 0;
   2589 	splx(s);
   2590 
   2591 	/* That's all... */
   2592 	kthread_exit(0);        /* does not return */
   2593 }
   2594 
   2595 void
   2596 rf_RewriteParityThread(raidPtr)
   2597 	RF_Raid_t *raidPtr;
   2598 {
   2599 	int retcode;
   2600 	int s;
   2601 
   2602 	raidPtr->parity_rewrite_in_progress = 1;
   2603 	s = splbio();
   2604 	retcode = rf_RewriteParity(raidPtr);
   2605 	splx(s);
   2606 	if (retcode) {
   2607 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2608 	} else {
   2609 		/* set the clean bit!  If we shutdown correctly,
   2610 		   the clean bit on each component label will get
   2611 		   set */
   2612 		raidPtr->parity_good = RF_RAID_CLEAN;
   2613 	}
   2614 	raidPtr->parity_rewrite_in_progress = 0;
   2615 
   2616 	/* Anyone waiting for us to stop?  If so, inform them... */
   2617 	if (raidPtr->waitShutdown) {
   2618 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2619 	}
   2620 
   2621 	/* That's all... */
   2622 	kthread_exit(0);        /* does not return */
   2623 }
   2624 
   2625 
   2626 void
   2627 rf_CopybackThread(raidPtr)
   2628 	RF_Raid_t *raidPtr;
   2629 {
   2630 	int s;
   2631 
   2632 	raidPtr->copyback_in_progress = 1;
   2633 	s = splbio();
   2634 	rf_CopybackReconstructedData(raidPtr);
   2635 	splx(s);
   2636 	raidPtr->copyback_in_progress = 0;
   2637 
   2638 	/* That's all... */
   2639 	kthread_exit(0);        /* does not return */
   2640 }
   2641 
   2642 
   2643 void
   2644 rf_ReconstructInPlaceThread(req)
   2645 	struct rf_recon_req *req;
   2646 {
   2647 	int retcode;
   2648 	int s;
   2649 	RF_Raid_t *raidPtr;
   2650 
   2651 	s = splbio();
   2652 	raidPtr = req->raidPtr;
   2653 	raidPtr->recon_in_progress = 1;
   2654 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2655 	RF_Free(req, sizeof(*req));
   2656 	raidPtr->recon_in_progress = 0;
   2657 	splx(s);
   2658 
   2659 	/* That's all... */
   2660 	kthread_exit(0);        /* does not return */
   2661 }
   2662 
   2663 RF_AutoConfig_t *
   2664 rf_find_raid_components()
   2665 {
   2666 	struct vnode *vp;
   2667 	struct disklabel label;
   2668 	struct device *dv;
   2669 	dev_t dev;
   2670 	int bmajor;
   2671 	int error;
   2672 	int i;
   2673 	int good_one;
   2674 	RF_ComponentLabel_t *clabel;
   2675 	RF_AutoConfig_t *ac_list;
   2676 	RF_AutoConfig_t *ac;
   2677 
   2678 
   2679 	/* initialize the AutoConfig list */
   2680 	ac_list = NULL;
   2681 
   2682 	/* we begin by trolling through *all* the devices on the system */
   2683 
   2684 	for (dv = alldevs.tqh_first; dv != NULL;
   2685 	     dv = dv->dv_list.tqe_next) {
   2686 
   2687 		/* we are only interested in disks... */
   2688 		if (dv->dv_class != DV_DISK)
   2689 			continue;
   2690 
   2691 		/* we don't care about floppies... */
   2692 		if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
   2693 			continue;
   2694 		}
   2695 
   2696 		/* we don't care about CD's... */
   2697 		if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
   2698 			continue;
   2699 		}
   2700 
   2701 		/* hdfd is the Atari/Hades floppy driver */
   2702 		if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
   2703 			continue;
   2704 		}
   2705 		/* fdisa is the Atari/Milan floppy driver */
   2706 		if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
   2707 			continue;
   2708 		}
   2709 
   2710 		/* need to find the device_name_to_block_device_major stuff */
   2711 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2712 
   2713 		/* get a vnode for the raw partition of this disk */
   2714 
   2715 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2716 		if (bdevvp(dev, &vp))
   2717 			panic("RAID can't alloc vnode");
   2718 
   2719 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2720 
   2721 		if (error) {
   2722 			/* "Who cares."  Continue looking
   2723 			   for something that exists*/
   2724 			vput(vp);
   2725 			continue;
   2726 		}
   2727 
   2728 		/* Ok, the disk exists.  Go get the disklabel. */
   2729 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2730 				  FREAD, NOCRED, 0);
   2731 		if (error) {
   2732 			/*
   2733 			 * XXX can't happen - open() would
   2734 			 * have errored out (or faked up one)
   2735 			 */
   2736 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2737 			       dv->dv_xname, 'a' + RAW_PART, error);
   2738 		}
   2739 
   2740 		/* don't need this any more.  We'll allocate it again
   2741 		   a little later if we really do... */
   2742 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2743 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2744 		vput(vp);
   2745 
   2746 		for (i=0; i < label.d_npartitions; i++) {
   2747 			/* We only support partitions marked as RAID */
   2748 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2749 				continue;
   2750 
   2751 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2752 			if (bdevvp(dev, &vp))
   2753 				panic("RAID can't alloc vnode");
   2754 
   2755 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2756 			if (error) {
   2757 				/* Whatever... */
   2758 				vput(vp);
   2759 				continue;
   2760 			}
   2761 
   2762 			good_one = 0;
   2763 
   2764 			clabel = (RF_ComponentLabel_t *)
   2765 				malloc(sizeof(RF_ComponentLabel_t),
   2766 				       M_RAIDFRAME, M_NOWAIT);
   2767 			if (clabel == NULL) {
   2768 				/* XXX CLEANUP HERE */
   2769 				printf("RAID auto config: out of memory!\n");
   2770 				return(NULL); /* XXX probably should panic? */
   2771 			}
   2772 
   2773 			if (!raidread_component_label(dev, vp, clabel)) {
   2774 				/* Got the label.  Does it look reasonable? */
   2775 				if (rf_reasonable_label(clabel) &&
   2776 				    (clabel->partitionSize <=
   2777 				     label.d_partitions[i].p_size)) {
   2778 #if DEBUG
   2779 					printf("Component on: %s%c: %d\n",
   2780 					       dv->dv_xname, 'a'+i,
   2781 					       label.d_partitions[i].p_size);
   2782 					rf_print_component_label(clabel);
   2783 #endif
   2784 					/* if it's reasonable, add it,
   2785 					   else ignore it. */
   2786 					ac = (RF_AutoConfig_t *)
   2787 						malloc(sizeof(RF_AutoConfig_t),
   2788 						       M_RAIDFRAME,
   2789 						       M_NOWAIT);
   2790 					if (ac == NULL) {
   2791 						/* XXX should panic?? */
   2792 						return(NULL);
   2793 					}
   2794 
   2795 					sprintf(ac->devname, "%s%c",
   2796 						dv->dv_xname, 'a'+i);
   2797 					ac->dev = dev;
   2798 					ac->vp = vp;
   2799 					ac->clabel = clabel;
   2800 					ac->next = ac_list;
   2801 					ac_list = ac;
   2802 					good_one = 1;
   2803 				}
   2804 			}
   2805 			if (!good_one) {
   2806 				/* cleanup */
   2807 				free(clabel, M_RAIDFRAME);
   2808 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2809 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2810 				vput(vp);
   2811 			}
   2812 		}
   2813 	}
   2814 	return(ac_list);
   2815 }
   2816 
   2817 static int
   2818 rf_reasonable_label(clabel)
   2819 	RF_ComponentLabel_t *clabel;
   2820 {
   2821 
   2822 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2823 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2824 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2825 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2826 	    clabel->row >=0 &&
   2827 	    clabel->column >= 0 &&
   2828 	    clabel->num_rows > 0 &&
   2829 	    clabel->num_columns > 0 &&
   2830 	    clabel->row < clabel->num_rows &&
   2831 	    clabel->column < clabel->num_columns &&
   2832 	    clabel->blockSize > 0 &&
   2833 	    clabel->numBlocks > 0) {
   2834 		/* label looks reasonable enough... */
   2835 		return(1);
   2836 	}
   2837 	return(0);
   2838 }
   2839 
   2840 
   2841 #if DEBUG
   2842 void
   2843 rf_print_component_label(clabel)
   2844 	RF_ComponentLabel_t *clabel;
   2845 {
   2846 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2847 	       clabel->row, clabel->column,
   2848 	       clabel->num_rows, clabel->num_columns);
   2849 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2850 	       clabel->version, clabel->serial_number,
   2851 	       clabel->mod_counter);
   2852 	printf("   Clean: %s Status: %d\n",
   2853 	       clabel->clean ? "Yes" : "No", clabel->status );
   2854 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2855 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2856 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2857 	       (char) clabel->parityConfig, clabel->blockSize,
   2858 	       clabel->numBlocks);
   2859 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2860 	printf("   Contains root partition: %s\n",
   2861 	       clabel->root_partition ? "Yes" : "No" );
   2862 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2863 #if 0
   2864 	   printf("   Config order: %d\n", clabel->config_order);
   2865 #endif
   2866 
   2867 }
   2868 #endif
   2869 
   2870 RF_ConfigSet_t *
   2871 rf_create_auto_sets(ac_list)
   2872 	RF_AutoConfig_t *ac_list;
   2873 {
   2874 	RF_AutoConfig_t *ac;
   2875 	RF_ConfigSet_t *config_sets;
   2876 	RF_ConfigSet_t *cset;
   2877 	RF_AutoConfig_t *ac_next;
   2878 
   2879 
   2880 	config_sets = NULL;
   2881 
   2882 	/* Go through the AutoConfig list, and figure out which components
   2883 	   belong to what sets.  */
   2884 	ac = ac_list;
   2885 	while(ac!=NULL) {
   2886 		/* we're going to putz with ac->next, so save it here
   2887 		   for use at the end of the loop */
   2888 		ac_next = ac->next;
   2889 
   2890 		if (config_sets == NULL) {
   2891 			/* will need at least this one... */
   2892 			config_sets = (RF_ConfigSet_t *)
   2893 				malloc(sizeof(RF_ConfigSet_t),
   2894 				       M_RAIDFRAME, M_NOWAIT);
   2895 			if (config_sets == NULL) {
   2896 				panic("rf_create_auto_sets: No memory!");
   2897 			}
   2898 			/* this one is easy :) */
   2899 			config_sets->ac = ac;
   2900 			config_sets->next = NULL;
   2901 			config_sets->rootable = 0;
   2902 			ac->next = NULL;
   2903 		} else {
   2904 			/* which set does this component fit into? */
   2905 			cset = config_sets;
   2906 			while(cset!=NULL) {
   2907 				if (rf_does_it_fit(cset, ac)) {
   2908 					/* looks like it matches... */
   2909 					ac->next = cset->ac;
   2910 					cset->ac = ac;
   2911 					break;
   2912 				}
   2913 				cset = cset->next;
   2914 			}
   2915 			if (cset==NULL) {
   2916 				/* didn't find a match above... new set..*/
   2917 				cset = (RF_ConfigSet_t *)
   2918 					malloc(sizeof(RF_ConfigSet_t),
   2919 					       M_RAIDFRAME, M_NOWAIT);
   2920 				if (cset == NULL) {
   2921 					panic("rf_create_auto_sets: No memory!");
   2922 				}
   2923 				cset->ac = ac;
   2924 				ac->next = NULL;
   2925 				cset->next = config_sets;
   2926 				cset->rootable = 0;
   2927 				config_sets = cset;
   2928 			}
   2929 		}
   2930 		ac = ac_next;
   2931 	}
   2932 
   2933 
   2934 	return(config_sets);
   2935 }
   2936 
   2937 static int
   2938 rf_does_it_fit(cset, ac)
   2939 	RF_ConfigSet_t *cset;
   2940 	RF_AutoConfig_t *ac;
   2941 {
   2942 	RF_ComponentLabel_t *clabel1, *clabel2;
   2943 
   2944 	/* If this one matches the *first* one in the set, that's good
   2945 	   enough, since the other members of the set would have been
   2946 	   through here too... */
   2947 	/* note that we are not checking partitionSize here..
   2948 
   2949 	   Note that we are also not checking the mod_counters here.
   2950 	   If everything else matches execpt the mod_counter, that's
   2951 	   good enough for this test.  We will deal with the mod_counters
   2952 	   a little later in the autoconfiguration process.
   2953 
   2954 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2955 
   2956 	   The reason we don't check for this is that failed disks
   2957 	   will have lower modification counts.  If those disks are
   2958 	   not added to the set they used to belong to, then they will
   2959 	   form their own set, which may result in 2 different sets,
   2960 	   for example, competing to be configured at raid0, and
   2961 	   perhaps competing to be the root filesystem set.  If the
   2962 	   wrong ones get configured, or both attempt to become /,
   2963 	   weird behaviour and or serious lossage will occur.  Thus we
   2964 	   need to bring them into the fold here, and kick them out at
   2965 	   a later point.
   2966 
   2967 	*/
   2968 
   2969 	clabel1 = cset->ac->clabel;
   2970 	clabel2 = ac->clabel;
   2971 	if ((clabel1->version == clabel2->version) &&
   2972 	    (clabel1->serial_number == clabel2->serial_number) &&
   2973 	    (clabel1->num_rows == clabel2->num_rows) &&
   2974 	    (clabel1->num_columns == clabel2->num_columns) &&
   2975 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2976 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2977 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2978 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2979 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2980 	    (clabel1->blockSize == clabel2->blockSize) &&
   2981 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2982 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2983 	    (clabel1->root_partition == clabel2->root_partition) &&
   2984 	    (clabel1->last_unit == clabel2->last_unit) &&
   2985 	    (clabel1->config_order == clabel2->config_order)) {
   2986 		/* if it get's here, it almost *has* to be a match */
   2987 	} else {
   2988 		/* it's not consistent with somebody in the set..
   2989 		   punt */
   2990 		return(0);
   2991 	}
   2992 	/* all was fine.. it must fit... */
   2993 	return(1);
   2994 }
   2995 
   2996 int
   2997 rf_have_enough_components(cset)
   2998 	RF_ConfigSet_t *cset;
   2999 {
   3000 	RF_AutoConfig_t *ac;
   3001 	RF_AutoConfig_t *auto_config;
   3002 	RF_ComponentLabel_t *clabel;
   3003 	int r,c;
   3004 	int num_rows;
   3005 	int num_cols;
   3006 	int num_missing;
   3007 	int mod_counter;
   3008 	int mod_counter_found;
   3009 	int even_pair_failed;
   3010 	char parity_type;
   3011 
   3012 
   3013 	/* check to see that we have enough 'live' components
   3014 	   of this set.  If so, we can configure it if necessary */
   3015 
   3016 	num_rows = cset->ac->clabel->num_rows;
   3017 	num_cols = cset->ac->clabel->num_columns;
   3018 	parity_type = cset->ac->clabel->parityConfig;
   3019 
   3020 	/* XXX Check for duplicate components!?!?!? */
   3021 
   3022 	/* Determine what the mod_counter is supposed to be for this set. */
   3023 
   3024 	mod_counter_found = 0;
   3025 	mod_counter = 0;
   3026 	ac = cset->ac;
   3027 	while(ac!=NULL) {
   3028 		if (mod_counter_found==0) {
   3029 			mod_counter = ac->clabel->mod_counter;
   3030 			mod_counter_found = 1;
   3031 		} else {
   3032 			if (ac->clabel->mod_counter > mod_counter) {
   3033 				mod_counter = ac->clabel->mod_counter;
   3034 			}
   3035 		}
   3036 		ac = ac->next;
   3037 	}
   3038 
   3039 	num_missing = 0;
   3040 	auto_config = cset->ac;
   3041 
   3042 	for(r=0; r<num_rows; r++) {
   3043 		even_pair_failed = 0;
   3044 		for(c=0; c<num_cols; c++) {
   3045 			ac = auto_config;
   3046 			while(ac!=NULL) {
   3047 				if ((ac->clabel->row == r) &&
   3048 				    (ac->clabel->column == c) &&
   3049 				    (ac->clabel->mod_counter == mod_counter)) {
   3050 					/* it's this one... */
   3051 #if DEBUG
   3052 					printf("Found: %s at %d,%d\n",
   3053 					       ac->devname,r,c);
   3054 #endif
   3055 					break;
   3056 				}
   3057 				ac=ac->next;
   3058 			}
   3059 			if (ac==NULL) {
   3060 				/* Didn't find one here! */
   3061 				/* special case for RAID 1, especially
   3062 				   where there are more than 2
   3063 				   components (where RAIDframe treats
   3064 				   things a little differently :( ) */
   3065 				if (parity_type == '1') {
   3066 					if (c%2 == 0) { /* even component */
   3067 						even_pair_failed = 1;
   3068 					} else { /* odd component.  If
   3069                                                     we're failed, and
   3070                                                     so is the even
   3071                                                     component, it's
   3072                                                     "Good Night, Charlie" */
   3073 						if (even_pair_failed == 1) {
   3074 							return(0);
   3075 						}
   3076 					}
   3077 				} else {
   3078 					/* normal accounting */
   3079 					num_missing++;
   3080 				}
   3081 			}
   3082 			if ((parity_type == '1') && (c%2 == 1)) {
   3083 				/* Just did an even component, and we didn't
   3084 				   bail.. reset the even_pair_failed flag,
   3085 				   and go on to the next component.... */
   3086 				even_pair_failed = 0;
   3087 			}
   3088 		}
   3089 	}
   3090 
   3091 	clabel = cset->ac->clabel;
   3092 
   3093 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3094 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3095 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3096 		/* XXX this needs to be made *much* more general */
   3097 		/* Too many failures */
   3098 		return(0);
   3099 	}
   3100 	/* otherwise, all is well, and we've got enough to take a kick
   3101 	   at autoconfiguring this set */
   3102 	return(1);
   3103 }
   3104 
   3105 void
   3106 rf_create_configuration(ac,config,raidPtr)
   3107 	RF_AutoConfig_t *ac;
   3108 	RF_Config_t *config;
   3109 	RF_Raid_t *raidPtr;
   3110 {
   3111 	RF_ComponentLabel_t *clabel;
   3112 	int i;
   3113 
   3114 	clabel = ac->clabel;
   3115 
   3116 	/* 1. Fill in the common stuff */
   3117 	config->numRow = clabel->num_rows;
   3118 	config->numCol = clabel->num_columns;
   3119 	config->numSpare = 0; /* XXX should this be set here? */
   3120 	config->sectPerSU = clabel->sectPerSU;
   3121 	config->SUsPerPU = clabel->SUsPerPU;
   3122 	config->SUsPerRU = clabel->SUsPerRU;
   3123 	config->parityConfig = clabel->parityConfig;
   3124 	/* XXX... */
   3125 	strcpy(config->diskQueueType,"fifo");
   3126 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3127 	config->layoutSpecificSize = 0; /* XXX ?? */
   3128 
   3129 	while(ac!=NULL) {
   3130 		/* row/col values will be in range due to the checks
   3131 		   in reasonable_label() */
   3132 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3133 		       ac->devname);
   3134 		ac = ac->next;
   3135 	}
   3136 
   3137 	for(i=0;i<RF_MAXDBGV;i++) {
   3138 		config->debugVars[i][0] = NULL;
   3139 	}
   3140 }
   3141 
   3142 int
   3143 rf_set_autoconfig(raidPtr, new_value)
   3144 	RF_Raid_t *raidPtr;
   3145 	int new_value;
   3146 {
   3147 	RF_ComponentLabel_t clabel;
   3148 	struct vnode *vp;
   3149 	dev_t dev;
   3150 	int row, column;
   3151 
   3152 	raidPtr->autoconfigure = new_value;
   3153 	for(row=0; row<raidPtr->numRow; row++) {
   3154 		for(column=0; column<raidPtr->numCol; column++) {
   3155 			if (raidPtr->Disks[row][column].status ==
   3156 			    rf_ds_optimal) {
   3157 				dev = raidPtr->Disks[row][column].dev;
   3158 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3159 				raidread_component_label(dev, vp, &clabel);
   3160 				clabel.autoconfigure = new_value;
   3161 				raidwrite_component_label(dev, vp, &clabel);
   3162 			}
   3163 		}
   3164 	}
   3165 	return(new_value);
   3166 }
   3167 
   3168 int
   3169 rf_set_rootpartition(raidPtr, new_value)
   3170 	RF_Raid_t *raidPtr;
   3171 	int new_value;
   3172 {
   3173 	RF_ComponentLabel_t clabel;
   3174 	struct vnode *vp;
   3175 	dev_t dev;
   3176 	int row, column;
   3177 
   3178 	raidPtr->root_partition = new_value;
   3179 	for(row=0; row<raidPtr->numRow; row++) {
   3180 		for(column=0; column<raidPtr->numCol; column++) {
   3181 			if (raidPtr->Disks[row][column].status ==
   3182 			    rf_ds_optimal) {
   3183 				dev = raidPtr->Disks[row][column].dev;
   3184 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3185 				raidread_component_label(dev, vp, &clabel);
   3186 				clabel.root_partition = new_value;
   3187 				raidwrite_component_label(dev, vp, &clabel);
   3188 			}
   3189 		}
   3190 	}
   3191 	return(new_value);
   3192 }
   3193 
   3194 void
   3195 rf_release_all_vps(cset)
   3196 	RF_ConfigSet_t *cset;
   3197 {
   3198 	RF_AutoConfig_t *ac;
   3199 
   3200 	ac = cset->ac;
   3201 	while(ac!=NULL) {
   3202 		/* Close the vp, and give it back */
   3203 		if (ac->vp) {
   3204 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3205 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3206 			vput(ac->vp);
   3207 			ac->vp = NULL;
   3208 		}
   3209 		ac = ac->next;
   3210 	}
   3211 }
   3212 
   3213 
   3214 void
   3215 rf_cleanup_config_set(cset)
   3216 	RF_ConfigSet_t *cset;
   3217 {
   3218 	RF_AutoConfig_t *ac;
   3219 	RF_AutoConfig_t *next_ac;
   3220 
   3221 	ac = cset->ac;
   3222 	while(ac!=NULL) {
   3223 		next_ac = ac->next;
   3224 		/* nuke the label */
   3225 		free(ac->clabel, M_RAIDFRAME);
   3226 		/* cleanup the config structure */
   3227 		free(ac, M_RAIDFRAME);
   3228 		/* "next.." */
   3229 		ac = next_ac;
   3230 	}
   3231 	/* and, finally, nuke the config set */
   3232 	free(cset, M_RAIDFRAME);
   3233 }
   3234 
   3235 
   3236 void
   3237 raid_init_component_label(raidPtr, clabel)
   3238 	RF_Raid_t *raidPtr;
   3239 	RF_ComponentLabel_t *clabel;
   3240 {
   3241 	/* current version number */
   3242 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3243 	clabel->serial_number = raidPtr->serial_number;
   3244 	clabel->mod_counter = raidPtr->mod_counter;
   3245 	clabel->num_rows = raidPtr->numRow;
   3246 	clabel->num_columns = raidPtr->numCol;
   3247 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3248 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3249 
   3250 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3251 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3252 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3253 
   3254 	clabel->blockSize = raidPtr->bytesPerSector;
   3255 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3256 
   3257 	/* XXX not portable */
   3258 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3259 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3260 	clabel->autoconfigure = raidPtr->autoconfigure;
   3261 	clabel->root_partition = raidPtr->root_partition;
   3262 	clabel->last_unit = raidPtr->raidid;
   3263 	clabel->config_order = raidPtr->config_order;
   3264 }
   3265 
   3266 int
   3267 rf_auto_config_set(cset,unit)
   3268 	RF_ConfigSet_t *cset;
   3269 	int *unit;
   3270 {
   3271 	RF_Raid_t *raidPtr;
   3272 	RF_Config_t *config;
   3273 	int raidID;
   3274 	int retcode;
   3275 
   3276 #if DEBUG
   3277 	printf("RAID autoconfigure\n");
   3278 #endif
   3279 
   3280 	retcode = 0;
   3281 	*unit = -1;
   3282 
   3283 	/* 1. Create a config structure */
   3284 
   3285 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3286 				       M_RAIDFRAME,
   3287 				       M_NOWAIT);
   3288 	if (config==NULL) {
   3289 		printf("Out of mem!?!?\n");
   3290 				/* XXX do something more intelligent here. */
   3291 		return(1);
   3292 	}
   3293 
   3294 	memset(config, 0, sizeof(RF_Config_t));
   3295 
   3296 	/*
   3297 	   2. Figure out what RAID ID this one is supposed to live at
   3298 	   See if we can get the same RAID dev that it was configured
   3299 	   on last time..
   3300 	*/
   3301 
   3302 	raidID = cset->ac->clabel->last_unit;
   3303 	if ((raidID < 0) || (raidID >= numraid)) {
   3304 		/* let's not wander off into lala land. */
   3305 		raidID = numraid - 1;
   3306 	}
   3307 	if (raidPtrs[raidID]->valid != 0) {
   3308 
   3309 		/*
   3310 		   Nope... Go looking for an alternative...
   3311 		   Start high so we don't immediately use raid0 if that's
   3312 		   not taken.
   3313 		*/
   3314 
   3315 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3316 			if (raidPtrs[raidID]->valid == 0) {
   3317 				/* can use this one! */
   3318 				break;
   3319 			}
   3320 		}
   3321 	}
   3322 
   3323 	if (raidID < 0) {
   3324 		/* punt... */
   3325 		printf("Unable to auto configure this set!\n");
   3326 		printf("(Out of RAID devs!)\n");
   3327 		return(1);
   3328 	}
   3329 
   3330 #if DEBUG
   3331 	printf("Configuring raid%d:\n",raidID);
   3332 #endif
   3333 
   3334 	raidPtr = raidPtrs[raidID];
   3335 
   3336 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3337 	raidPtr->raidid = raidID;
   3338 	raidPtr->openings = RAIDOUTSTANDING;
   3339 
   3340 	/* 3. Build the configuration structure */
   3341 	rf_create_configuration(cset->ac, config, raidPtr);
   3342 
   3343 	/* 4. Do the configuration */
   3344 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3345 
   3346 	if (retcode == 0) {
   3347 
   3348 		raidinit(raidPtrs[raidID]);
   3349 
   3350 		rf_markalldirty(raidPtrs[raidID]);
   3351 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3352 		if (cset->ac->clabel->root_partition==1) {
   3353 			/* everything configured just fine.  Make a note
   3354 			   that this set is eligible to be root. */
   3355 			cset->rootable = 1;
   3356 			/* XXX do this here? */
   3357 			raidPtrs[raidID]->root_partition = 1;
   3358 		}
   3359 	}
   3360 
   3361 	/* 5. Cleanup */
   3362 	free(config, M_RAIDFRAME);
   3363 
   3364 	*unit = raidID;
   3365 	return(retcode);
   3366 }
   3367 
   3368 void
   3369 rf_disk_unbusy(desc)
   3370 	RF_RaidAccessDesc_t *desc;
   3371 {
   3372 	struct buf *bp;
   3373 
   3374 	bp = (struct buf *)desc->bp;
   3375 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3376 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3377 }
   3378